From 8ce9aeee4e84ae43cbcdd0a56ceb6d3e1e19e967 Mon Sep 17 00:00:00 2001 From: petricadaipegsp <155911522+petricadaipegsp@users.noreply.github.com> Date: Wed, 20 Nov 2024 23:59:09 +0100 Subject: [PATCH 1/9] Disallow excessive GOMAXPROCS (#368) --- node/config/engine.go | 2 ++ node/main.go | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/node/config/engine.go b/node/config/engine.go index 3e15a6f..3078f31 100644 --- a/node/config/engine.go +++ b/node/config/engine.go @@ -34,4 +34,6 @@ type EngineConfig struct { // Values used only for testing – do not override these in production, your // node will get kicked out Difficulty uint32 `yaml:"difficulty"` + // Whether to allow GOMAXPROCS values above the number of physical cores. + AllowExcessiveGOMAXPROCS bool `yaml:"allowExcessiveGOMAXPROCS"` } diff --git a/node/main.go b/node/main.go index 0003832..c03fde0 100644 --- a/node/main.go +++ b/node/main.go @@ -394,6 +394,12 @@ func main() { nodeConfig.Engine.DataWorkerMemoryLimit = 1792 * 1024 * 1024 // 1.75GiB } if len(nodeConfig.Engine.DataWorkerMultiaddrs) == 0 { + maxProcs, numCPU := runtime.GOMAXPROCS(0), runtime.NumCPU() + if maxProcs > numCPU && !nodeConfig.Engine.AllowExcessiveGOMAXPROCS { + fmt.Println("GOMAXPROCS is set higher than the number of available CPUs.") + os.Exit(1) + } + nodeConfig.Engine.DataWorkerCount = qruntime.WorkerCount( nodeConfig.Engine.DataWorkerCount, true, ) From 4d3ac60e2b1ec665781d880c8bc05ec23a2aca4f Mon Sep 17 00:00:00 2001 From: petricadaipegsp <155911522+petricadaipegsp@users.noreply.github.com> Date: Wed, 20 Nov 2024 23:59:24 +0100 Subject: [PATCH 2/9] Increase default sync timeout to 4 seconds (#369) --- node/consensus/data/consensus_frames.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node/consensus/data/consensus_frames.go b/node/consensus/data/consensus_frames.go index 82e4514..81b081b 100644 --- a/node/consensus/data/consensus_frames.go +++ b/node/consensus/data/consensus_frames.go @@ -20,7 +20,7 @@ import ( "source.quilibrium.com/quilibrium/monorepo/node/protobufs" ) -const defaultSyncTimeout = 2 * time.Second +const defaultSyncTimeout = 4 * time.Second func (e *DataClockConsensusEngine) collect( enqueuedFrame *protobufs.ClockFrame, From cbc405a3a0ddee90d3e7055832de4a1e785bb6aa Mon Sep 17 00:00:00 2001 From: petricadaipegsp <155911522+petricadaipegsp@users.noreply.github.com> Date: Thu, 21 Nov 2024 00:05:10 +0100 Subject: [PATCH 3/9] Refactor peer pinging to target individual connections (#370) --- go-libp2p/p2p/protocol/ping/ping.go | 62 ++++++++++++++++++++++++----- node/p2p/internal/peer_monitor.go | 35 ++++++---------- 2 files changed, 66 insertions(+), 31 deletions(-) diff --git a/go-libp2p/p2p/protocol/ping/ping.go b/go-libp2p/p2p/protocol/ping/ping.go index f95c944..1dba956 100644 --- a/go-libp2p/p2p/protocol/ping/ping.go +++ b/go-libp2p/p2p/protocol/ping/ping.go @@ -15,6 +15,9 @@ import ( "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/network" "github.com/libp2p/go-libp2p/core/peer" + "github.com/libp2p/go-libp2p/core/peerstore" + "github.com/libp2p/go-libp2p/core/protocol" + mstream "github.com/multiformats/go-multistream" ) var log = logging.Logger("ping") @@ -114,14 +117,7 @@ func pingError(err error) chan Result { return ch } -// Ping pings the remote peer until the context is canceled, returning a stream -// of RTTs or errors. -func Ping(ctx context.Context, h host.Host, p peer.ID) <-chan Result { - s, err := h.NewStream(network.WithAllowLimitedConn(ctx, "ping"), p, ID) - if err != nil { - return pingError(err) - } - +func pingStream(ctx context.Context, ps peerstore.Peerstore, s network.Stream) <-chan Result { if err := s.Scope().SetService(ServiceName); err != nil { log.Debugf("error attaching stream to ping service: %s", err) s.Reset() @@ -153,7 +149,7 @@ func Ping(ctx context.Context, h host.Host, p peer.ID) <-chan Result { // No error, record the RTT. if res.Error == nil { - h.Peerstore().RecordLatency(p, res.RTT) + ps.RecordLatency(s.Conn().RemotePeer(), res.RTT) } select { @@ -175,6 +171,54 @@ func Ping(ctx context.Context, h host.Host, p peer.ID) <-chan Result { return out } +// PingConn pings the peer via the connection until the context is canceled, returning a stream +// of RTTs or errors. +func PingConn(ctx context.Context, ps peerstore.Peerstore, conn network.Conn) <-chan Result { + s, err := conn.NewStream(ctx) + if err != nil { + return pingError(err) + } + var selected protocol.ID + var errCh chan error = make(chan error, 1) + go func() { + var err error + selected, err = mstream.SelectOneOf([]protocol.ID{ID}, s) + select { + case <-ctx.Done(): + case errCh <- err: + } + }() + select { + case <-ctx.Done(): + _ = s.Reset() + return pingError(ctx.Err()) + case err := <-errCh: + if err != nil { + _ = s.Reset() + return pingError(err) + } + } + if err := s.SetProtocol(selected); err != nil { + _ = s.Reset() + return pingError(err) + } + if err := ps.AddProtocols(conn.RemotePeer(), selected); err != nil { + _ = s.Reset() + return pingError(err) + } + return pingStream(ctx, ps, s) +} + +// Ping pings the remote peer until the context is canceled, returning a stream +// of RTTs or errors. +func Ping(ctx context.Context, h host.Host, p peer.ID) <-chan Result { + s, err := h.NewStream(network.WithAllowLimitedConn(ctx, "ping"), p, ID) + if err != nil { + return pingError(err) + } + return pingStream(ctx, h.Peerstore(), s) +} + func ping(s network.Stream, randReader io.Reader) (time.Duration, error) { if err := s.Scope().ReserveMemory(2*PingSize, network.ReservationPriorityAlways); err != nil { log.Debugf("error reserving memory for ping stream: %s", err) diff --git a/node/p2p/internal/peer_monitor.go b/node/p2p/internal/peer_monitor.go index c40c775..78a412b 100644 --- a/node/p2p/internal/peer_monitor.go +++ b/node/p2p/internal/peer_monitor.go @@ -7,7 +7,6 @@ import ( "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/network" - "github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/p2p/protocol/ping" "go.uber.org/zap" ) @@ -19,7 +18,7 @@ type peerMonitor struct { attempts int } -func (pm *peerMonitor) pingOnce(ctx context.Context, logger *zap.Logger, id peer.ID) bool { +func (pm *peerMonitor) pingOnce(ctx context.Context, logger *zap.Logger, conn network.Conn) bool { pingCtx, cancel := context.WithTimeout(ctx, pm.timeout) defer cancel() select { @@ -27,7 +26,7 @@ func (pm *peerMonitor) pingOnce(ctx context.Context, logger *zap.Logger, id peer case <-pingCtx.Done(): logger.Debug("ping timeout") return false - case res := <-ping.Ping(pingCtx, pm.h, id): + case res := <-ping.PingConn(pingCtx, pm.h.Peerstore(), conn): if res.Error != nil { logger.Debug("ping error", zap.Error(res.Error)) return false @@ -37,43 +36,35 @@ func (pm *peerMonitor) pingOnce(ctx context.Context, logger *zap.Logger, id peer return true } -func (pm *peerMonitor) ping(ctx context.Context, logger *zap.Logger, wg *sync.WaitGroup, id peer.ID) { +func (pm *peerMonitor) ping(ctx context.Context, logger *zap.Logger, wg *sync.WaitGroup, conn network.Conn) { defer wg.Done() - var conns []network.Conn for i := 0; i < pm.attempts; i++ { - // There are no fine grained semantics in libp2p that would allow us to 'ping via - // a specific connection'. We can only ping a peer, which will attempt to open a stream via a connection. - // As such, we save a snapshot of the connections that were potentially in use before - // the ping, and close them if the ping fails. If new connections occur between the snapshot - // and the ping, they will not be closed, and will be pinged in the next iteration. - conns = pm.h.Network().ConnsToPeer(id) - if pm.pingOnce(ctx, logger, id) { + if pm.pingOnce(ctx, logger, conn) { + return + } + if conn.IsClosed() { return } } - for _, conn := range conns { - _ = conn.Close() - } + _ = conn.Close() } func (pm *peerMonitor) run(ctx context.Context, logger *zap.Logger) { - // Do not allow the pings to dial new connections. Adding new peers is a separate - // process and should not be done during the ping process. - ctx = network.WithNoDial(ctx, "monitor peers") for { select { case <-ctx.Done(): return case <-time.After(pm.period): - // This is once again a snapshot of the connected peers at the time of the ping. If new peers - // are added between the snapshot and the ping, they will be pinged in the next iteration. peers := pm.h.Network().Peers() logger.Debug("pinging connected peers", zap.Int("peer_count", len(peers))) wg := &sync.WaitGroup{} for _, id := range peers { logger := logger.With(zap.String("peer_id", id.String())) - wg.Add(1) - go pm.ping(ctx, logger, wg, id) + for _, conn := range pm.h.Network().ConnsToPeer(id) { + logger := logger.With(zap.String("connection_id", conn.ID())) + wg.Add(1) + go pm.ping(ctx, logger, wg, conn) + } } wg.Wait() logger.Debug("pinged connected peers") From 803cf4b7b34a7f859ecddf99dc508c13a1466054 Mon Sep 17 00:00:00 2001 From: petricadaipegsp <155911522+petricadaipegsp@users.noreply.github.com> Date: Thu, 21 Nov 2024 00:07:28 +0100 Subject: [PATCH 4/9] Close direct channels if the connection is fresh (#371) --- node/p2p/blossomsub.go | 68 ++++++++++++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 22 deletions(-) diff --git a/node/p2p/blossomsub.go b/node/p2p/blossomsub.go index d079ab4..67643a8 100644 --- a/node/p2p/blossomsub.go +++ b/node/p2p/blossomsub.go @@ -838,53 +838,77 @@ func (b *BlossomSub) StartDirectChannelListener( return errors.Wrap(server.Serve(bind), "start direct channel listener") } -func (b *BlossomSub) GetDirectChannel(key []byte, purpose string) ( - dialCtx *grpc.ClientConn, - err error, +type extraCloseConn struct { + net.Conn + extraClose func() +} + +func (c *extraCloseConn) Close() error { + err := c.Conn.Close() + c.extraClose() + return err +} + +func (b *BlossomSub) GetDirectChannel(peerID []byte, purpose string) ( + cc *grpc.ClientConn, err error, ) { // Kind of a weird hack, but gostream can induce panics if the peer drops at // the time of connection, this avoids the problem. defer func() { if r := recover(); r != nil { - dialCtx = nil + cc = nil err = errors.New("connection failed") } }() + id := peer.ID(peerID) + // Open question: should we prefix this so a node can run both in mainnet and // testnet? Feels like a bad idea and would be preferable to discourage. - dialCtx, err = qgrpc.DialContext( + cc, err = qgrpc.DialContext( b.ctx, - base58.Encode(key), - grpc.WithDialer( - func(peerIdStr string, timeout time.Duration) (net.Conn, error) { - subCtx, subCtxCancel := context.WithTimeout(b.ctx, timeout) - defer subCtxCancel() - - id, err := peer.Decode(peerIdStr) - if err != nil { - return nil, errors.Wrap(err, "dial context") + "passthrough:///", + grpc.WithContextDialer( + func(ctx context.Context, _ string) (net.Conn, error) { + // If we are not already connected to the peer, we will manually dial it + // before opening the direct channel. We will close the peer connection + // when the direct channel is closed. + alreadyConnected := false + switch connectedness := b.h.Network().Connectedness(id); connectedness { + case network.Connected, network.Limited: + alreadyConnected = true + default: + if err := b.h.Connect(ctx, peer.AddrInfo{ID: id}); err != nil { + return nil, errors.Wrap(err, "connect") + } } - c, err := gostream.Dial( - subCtx, + network.WithNoDial(ctx, "direct-channel"), b.h, - peer.ID(key), + id, protocol.ID( - "/p2p/direct-channel/"+peer.ID(id).String()+purpose, + "/p2p/direct-channel/"+id.String()+purpose, ), ) - - return c, errors.Wrap(err, "dial context") + if err != nil { + return nil, errors.Wrap(err, "dial direct channel") + } + if alreadyConnected { + return c, nil + } + return &extraCloseConn{ + Conn: c, + extraClose: func() { _ = b.h.Network().ClosePeer(id) }, + }, nil }, ), grpc.WithTransportCredentials(insecure.NewCredentials()), ) if err != nil { - return nil, errors.Wrap(err, "get direct channel") + return nil, errors.Wrap(err, "dial context") } - return dialCtx, nil + return cc, nil } func (b *BlossomSub) GetPublicKey() []byte { From 883f0605ae70a28de6a05be6bc8cb06db2aa3772 Mon Sep 17 00:00:00 2001 From: petricadaipegsp <155911522+petricadaipegsp@users.noreply.github.com> Date: Thu, 21 Nov 2024 00:08:19 +0100 Subject: [PATCH 5/9] Enable AutoNATv1 and NATPortMap (#372) --- node/p2p/blossomsub.go | 136 ++++++++++------------------------------- 1 file changed, 33 insertions(+), 103 deletions(-) diff --git a/node/p2p/blossomsub.go b/node/p2p/blossomsub.go index 67643a8..818260e 100644 --- a/node/p2p/blossomsub.go +++ b/node/p2p/blossomsub.go @@ -5,15 +5,10 @@ import ( "context" "crypto/rand" "encoding/hex" - "encoding/json" "fmt" - "io" "math/big" "math/bits" "net" - "net/http" - "strconv" - "strings" "sync" "time" @@ -21,12 +16,14 @@ import ( dht "github.com/libp2p/go-libp2p-kad-dht" libp2pconfig "github.com/libp2p/go-libp2p/config" "github.com/libp2p/go-libp2p/core/crypto" + "github.com/libp2p/go-libp2p/core/event" "github.com/libp2p/go-libp2p/core/host" "github.com/libp2p/go-libp2p/core/network" "github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/core/protocol" "github.com/libp2p/go-libp2p/p2p/discovery/routing" "github.com/libp2p/go-libp2p/p2p/discovery/util" + "github.com/libp2p/go-libp2p/p2p/host/eventbus" rcmgr "github.com/libp2p/go-libp2p/p2p/host/resource-manager" routedhost "github.com/libp2p/go-libp2p/p2p/host/routed" "github.com/libp2p/go-libp2p/p2p/net/connmgr" @@ -35,7 +32,6 @@ import ( "github.com/mr-tron/base58" ma "github.com/multiformats/go-multiaddr" madns "github.com/multiformats/go-multiaddr-dns" - mn "github.com/multiformats/go-multiaddr/net" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "go.uber.org/zap" @@ -188,6 +184,8 @@ func NewBlossomSub( opts := []libp2pconfig.Option{ libp2p.ListenAddrStrings(p2pConfig.ListenMultiaddr), + libp2p.EnableNATService(), + libp2p.NATPortMap(), } isBootstrapPeer := false @@ -315,6 +313,35 @@ func NewBlossomSub( logger.Info("established peer id", zap.String("peer_id", h.ID().String())) + reachabilitySub, err := h.EventBus().Subscribe(&event.EvtLocalReachabilityChanged{}, eventbus.Name("blossomsub")) + if err != nil { + panic(err) + } + go func() { + defer reachabilitySub.Close() + logger := logger.Named("reachability") + for { + select { + case <-ctx.Done(): + return + case evt, ok := <-reachabilitySub.Out(): + if !ok { + return + } + switch state := evt.(event.EvtLocalReachabilityChanged).Reachability; state { + case network.ReachabilityPublic: + logger.Info("node is externally reachable") + case network.ReachabilityPrivate: + logger.Info("node is not externally reachable") + case network.ReachabilityUnknown: + logger.Info("node reachability is unknown") + default: + logger.Debug("unknown reachability state", zap.Any("state", state)) + } + } + } + }() + kademliaDHT := initDHT( ctx, logger, @@ -328,8 +355,6 @@ func NewBlossomSub( routingDiscovery := routing.NewRoutingDiscovery(kademliaDHT) util.Advertise(ctx, routingDiscovery, getNetworkNamespace(p2pConfig.Network)) - verifyReachability(p2pConfig) - minBootstrapPeers := min(len(bootstrappers), p2pConfig.MinBootstrapPeers) bootstrap := internal.NewPeerConnector( ctx, @@ -921,101 +946,6 @@ func (b *BlossomSub) SignMessage(msg []byte) ([]byte, error) { return sig, errors.Wrap(err, "sign message") } -type ReachabilityRequest struct { - Port uint16 `json:"port"` - Type string `json:"type"` -} - -type ReachabilityResponse struct { - Reachable bool `json:"reachable"` - Error string `json:"error"` -} - -func verifyReachability(cfg *config.P2PConfig) bool { - a, err := ma.NewMultiaddr(cfg.ListenMultiaddr) - if err != nil { - return false - } - - transport, addr, err := mn.DialArgs(a) - if err != nil { - return false - } - - addrparts := strings.Split(addr, ":") - if len(addrparts) != 2 { - return false - } - - port, err := strconv.ParseUint(addrparts[1], 10, 0) - if err != nil { - return false - } - - if !strings.Contains(transport, "tcp") { - transport = "quic" - } else { - transport = "tcp" - } - - req := &ReachabilityRequest{ - Port: uint16(port), - Type: transport, - } - - b, err := json.Marshal(req) - if err != nil { - return false - } - - resp, err := http.Post( - "https://rpc.quilibrium.com/connectivity-check", - "application/json", - bytes.NewBuffer(b), - ) - if err != nil { - fmt.Println("Reachability check not currently available, skipping test.") - return true - } - defer resp.Body.Close() - - if resp.StatusCode != 200 { - fmt.Println("Reachability check not currently available, skipping test.") - return true - } - - bodyBytes, err := io.ReadAll(resp.Body) - if err != nil { - fmt.Println("Reachability check not currently available, skipping test.") - return true - } - - r := &ReachabilityResponse{} - err = json.Unmarshal(bodyBytes, r) - if err != nil { - fmt.Println("Reachability check not currently available, skipping test.") - return true - } - - if r.Error != "" { - fmt.Println("Reachability check failed: " + r.Error) - if transport == "quic" { - fmt.Println("WARNING!") - fmt.Println("WARNING!") - fmt.Println("WARNING!") - fmt.Println("You failed reachability with QUIC enabled. Consider switching to TCP") - fmt.Println("WARNING!") - fmt.Println("WARNING!") - fmt.Println("WARNING!") - time.Sleep(5 * time.Second) - } - return false - } - - fmt.Println("Node passed reachability check.") - return true -} - func withDefaults(p2pConfig *config.P2PConfig) *config.P2PConfig { cfg := *p2pConfig p2pConfig = &cfg From 4917eba8795cd4a17b0b0aab16d53d6761703d79 Mon Sep 17 00:00:00 2001 From: petricadaipegsp <155911522+petricadaipegsp@users.noreply.github.com> Date: Thu, 21 Nov 2024 00:09:20 +0100 Subject: [PATCH 6/9] Add BlosssomSub Grafana dashboard (#367) --- dashboards/grafana/BlossomSub.json | 1343 ++++++++++++++++++++++++++++ 1 file changed, 1343 insertions(+) create mode 100644 dashboards/grafana/BlossomSub.json diff --git a/dashboards/grafana/BlossomSub.json b/dashboards/grafana/BlossomSub.json new file mode 100644 index 0000000..119c283 --- /dev/null +++ b/dashboards/grafana/BlossomSub.json @@ -0,0 +1,1343 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "11.2.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 5, + "panels": [], + "title": "Messages", + "type": "row" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "pps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Name", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "avg by (bitmask) (rate(blossomsub_deliver_message_total{job=~\"$job\", instance=~\"$host\"}[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Message delivery rate", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA(.*)", + "renamePattern": "Master Frames$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAQA(.*)", + "renamePattern": "Data Frames$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AACAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAEA(.*)", + "renamePattern": "Data Token Requests$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAAAAAAABAA(.*)", + "renamePattern": "Data Peer Announcements$1" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "pps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Name", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "avg by (bitmask) (rate(blossomsub_validate_message_total{job=~\"$job\", instance=~\"$host\"}[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Message validate rate", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA(.*)", + "renamePattern": "Master Frames$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAQA(.*)", + "renamePattern": "Data Frames$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AACAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAEA(.*)", + "renamePattern": "Data Token Requests$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAAAAAAABAA(.*)", + "renamePattern": "Data Peer Announcements$1" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "pps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Name", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "avg by (bitmask, reason) (rate(blossomsub_reject_message_total{job=~\"$job\", instance=~\"$host\"}[$__rate_interval]))", + "instant": false, + "legendFormat": "{{bitmask}} - {{reason}}", + "range": true, + "refId": "A" + } + ], + "title": "Message reject rate", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA(.*)", + "renamePattern": "Master Frames$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAQA(.*)", + "renamePattern": "Data Frames$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AACAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAEA(.*)", + "renamePattern": "Data Token Requests$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAAAAAAABAA(.*)", + "renamePattern": "Data Peer Announcements$1" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "pps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Name", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "avg by (bitmask) (rate(blossomsub_duplicate_message_total{job=~\"$job\", instance=~\"$host\"}[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Message duplicate rate", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA(.*)", + "renamePattern": "Master Frames$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAQA(.*)", + "renamePattern": "Data Frames$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AACAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAEA(.*)", + "renamePattern": "Data Token Requests$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAAAAAAABAA(.*)", + "renamePattern": "Data Peer Announcements$1" + } + } + ], + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 8, + "panels": [], + "title": "Meshes", + "type": "row" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 11, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Name", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "avg by (bitmask) (blossomsub_graft_total{job=~\"$job\", instance=~\"$host\"} - blossomsub_prune_total{job=~\"$job\", instance=~\"$host\"})", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Peer count", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA(.*)", + "renamePattern": "Data Frames Shard 1$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAA(.*)", + "renamePattern": "Data Frames Shard 2$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQA(.*)", + "renamePattern": "Data Frames Shard 3$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA(.*)", + "renamePattern": "Data Token Requests Shard 1$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAA(.*)", + "renamePattern": "Data Token Requests Shard 2$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEA(.*)", + "renamePattern": "Data Token Requests Shard 3$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA(.*)", + "renamePattern": "Data Peer Announcements Shard 1$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAAAAAAAAAA(.*)", + "renamePattern": "Data Peer Announcements Shard 2$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAA(.*)", + "renamePattern": "Data Peer Announcements Shard 3$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA(.*)", + "renamePattern": "Master Frames$1" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "pps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 29 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Name", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "avg by (bitmask) (rate(blossomsub_graft_total{job=~\"$job\", instance=~\"$host\"}[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Graft rate", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA(.*)", + "renamePattern": "Data Frames Shard 1$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAA(.*)", + "renamePattern": "Data Frames Shard 2$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQA(.*)", + "renamePattern": "Data Frames Shard 3$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA(.*)", + "renamePattern": "Data Token Requests Shard 1$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAA(.*)", + "renamePattern": "Data Token Requests Shard 2$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEA(.*)", + "renamePattern": "Data Token Requests Shard 3$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA(.*)", + "renamePattern": "Data Peer Announcements Shard 1$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAAAAAAAAAA(.*)", + "renamePattern": "Data Peer Announcements Shard 2$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAA(.*)", + "renamePattern": "Data Peer Announcements Shard 3$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA(.*)", + "renamePattern": "Master Frames$1" + } + } + ], + "type": "timeseries" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "pps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 29 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Name", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "avg by (bitmask) (rate(blossomsub_prune_total{job=~\"$job\", instance=~\"$host\"}[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Prune rate", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA(.*)", + "renamePattern": "Data Frames Shard 1$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAA(.*)", + "renamePattern": "Data Frames Shard 2$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQA(.*)", + "renamePattern": "Data Frames Shard 3$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA(.*)", + "renamePattern": "Data Token Requests Shard 1$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAA(.*)", + "renamePattern": "Data Token Requests Shard 2$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEA(.*)", + "renamePattern": "Data Token Requests Shard 3$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA(.*)", + "renamePattern": "Data Peer Announcements Shard 1$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgAAAAAAAAAAA(.*)", + "renamePattern": "Data Peer Announcements Shard 2$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAA(.*)", + "renamePattern": "Data Peer Announcements Shard 3$1" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA(.*)", + "renamePattern": "Master Frames$1" + } + } + ], + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 6, + "panels": [], + "title": "RPCs", + "type": "row" + }, + { + "datasource": { + "default": false, + "type": "prometheus", + "uid": "${datasource}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "pps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "min", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true, + "sortBy": "Name", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(blossomsub_send_rpc_total{job=~\"$job\", instance=~\"$host\"}[$__rate_interval])", + "instant": false, + "legendFormat": "Sent", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "editorMode": "code", + "expr": "rate(blossomsub_recv_rpc_total{job=~\"$job\", instance=~\"$host\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "Received", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(blossomsub_drop_rpc_total{job=~\"$job\", instance=~\"$host\"}[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "Dropped", + "range": true, + "refId": "C" + } + ], + "title": "RPC rate", + "type": "timeseries" + } + ], + "refresh": "1m", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": {}, + "hide": 0, + "includeAll": false, + "label": "Datasource", + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(blossomsub_add_peer_total,job)", + "hide": 0, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(blossomsub_add_peer_total,job)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(blossomsub_add_peer_total{job=\"$job\"},instance)", + "hide": 0, + "includeAll": false, + "label": "Host", + "multi": false, + "name": "host", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(blossomsub_add_peer_total{job=\"$job\"},instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "BlossomSub", + "uid": "ee47pcfax962ob", + "version": 29, + "weekStart": "" +} \ No newline at end of file From b798de58717b9c0ed502fbcd111a3211f3f0390f Mon Sep 17 00:00:00 2001 From: petricadaipegsp <155911522+petricadaipegsp@users.noreply.github.com> Date: Thu, 21 Nov 2024 00:12:57 +0100 Subject: [PATCH 7/9] Trigger sync on ahead peer (#366) --- .../data/data_clock_consensus_engine.go | 2 +- node/consensus/data/main_data_loop.go | 21 +++++++++---------- node/consensus/data/message_handler.go | 7 +++++++ node/consensus/data/token_handle_mint_test.go | 3 ++- node/p2p/blossomsub.go | 10 +++++++-- node/p2p/pubsub.go | 3 ++- 6 files changed, 30 insertions(+), 16 deletions(-) diff --git a/node/consensus/data/data_clock_consensus_engine.go b/node/consensus/data/data_clock_consensus_engine.go index 0fee9ad..620fdf0 100644 --- a/node/consensus/data/data_clock_consensus_engine.go +++ b/node/consensus/data/data_clock_consensus_engine.go @@ -383,7 +383,7 @@ func (e *DataClockConsensusEngine) Start() <-chan error { } if currentHead.FrameNumber == lastHead.FrameNumber { currentBackoff = min(maxBackoff, currentBackoff+1) - _ = e.pubSub.DiscoverPeers() + _ = e.pubSub.DiscoverPeers(e.ctx) } else { currentBackoff = max(0, currentBackoff-1) lastHead = currentHead diff --git a/node/consensus/data/main_data_loop.go b/node/consensus/data/main_data_loop.go index 6e41286..dfba7b5 100644 --- a/node/consensus/data/main_data_loop.go +++ b/node/consensus/data/main_data_loop.go @@ -93,20 +93,19 @@ func (e *DataClockConsensusEngine) runSync() { case <-e.ctx.Done(): return case enqueuedFrame := <-e.requestSyncCh: + if enqueuedFrame == nil { + var err error + enqueuedFrame, err = e.dataTimeReel.Head() + if err != nil { + panic(err) + } + } + if err := e.pubSub.Bootstrap(e.ctx); err != nil { + e.logger.Error("could not bootstrap", zap.Error(err)) + } if _, err := e.collect(enqueuedFrame); err != nil { e.logger.Error("could not collect", zap.Error(err)) } - case <-time.After(20 * time.Second): - if e.GetFrameProverTries()[0].Contains(e.provingKeyAddress) { - continue - } - head, err := e.dataTimeReel.Head() - if err != nil { - panic(err) - } - if _, err := e.collect(head); err != nil { - e.logger.Error("could not collect", zap.Error(err)) - } } } } diff --git a/node/consensus/data/message_handler.go b/node/consensus/data/message_handler.go index 93775cb..542210f 100644 --- a/node/consensus/data/message_handler.go +++ b/node/consensus/data/message_handler.go @@ -339,6 +339,13 @@ func (e *DataClockConsensusEngine) handleDataPeerListAnnounce( } e.peerMapMx.Unlock() + select { + case <-e.ctx.Done(): + return nil + case e.requestSyncCh <- nil: + default: + } + return nil } diff --git a/node/consensus/data/token_handle_mint_test.go b/node/consensus/data/token_handle_mint_test.go index 60af3e2..caa3e27 100644 --- a/node/consensus/data/token_handle_mint_test.go +++ b/node/consensus/data/token_handle_mint_test.go @@ -78,7 +78,8 @@ func (pubsub) GetPeerScore(peerId []byte) int64 { return 0 } func (pubsub) SetPeerScore(peerId []byte, score int64) {} func (pubsub) AddPeerScore(peerId []byte, scoreDelta int64) {} func (pubsub) Reconnect(peerId []byte) error { return nil } -func (pubsub) DiscoverPeers() error { return nil } +func (pubsub) Bootstrap(context.Context) error { return nil } +func (pubsub) DiscoverPeers(context.Context) error { return nil } type outputs struct { difficulty uint32 diff --git a/node/p2p/blossomsub.go b/node/p2p/blossomsub.go index 818260e..16145db 100644 --- a/node/p2p/blossomsub.go +++ b/node/p2p/blossomsub.go @@ -73,6 +73,7 @@ type BlossomSub struct { peerScore map[string]int64 peerScoreMx sync.Mutex network uint8 + bootstrap internal.PeerConnector discovery internal.PeerConnector } @@ -377,6 +378,7 @@ func NewBlossomSub( ), bootstrap, ) + bs.bootstrap = bootstrap discovery := internal.NewPeerConnector( ctx, @@ -756,8 +758,12 @@ func (b *BlossomSub) Reconnect(peerId []byte) error { return nil } -func (b *BlossomSub) DiscoverPeers() error { - return b.discovery.Connect(b.ctx) +func (b *BlossomSub) Bootstrap(ctx context.Context) error { + return b.bootstrap.Connect(ctx) +} + +func (b *BlossomSub) DiscoverPeers(ctx context.Context) error { + return b.discovery.Connect(ctx) } func (b *BlossomSub) GetPeerScore(peerId []byte) int64 { diff --git a/node/p2p/pubsub.go b/node/p2p/pubsub.go index 10278cc..d02acc4 100644 --- a/node/p2p/pubsub.go +++ b/node/p2p/pubsub.go @@ -52,6 +52,7 @@ type PubSub interface { SetPeerScore(peerId []byte, score int64) AddPeerScore(peerId []byte, scoreDelta int64) Reconnect(peerId []byte) error - DiscoverPeers() error + Bootstrap(ctx context.Context) error + DiscoverPeers(ctx context.Context) error GetNetwork() uint } From a66baac045c9f80acaff641f3b3191342792015d Mon Sep 17 00:00:00 2001 From: petricadaipegsp <155911522+petricadaipegsp@users.noreply.github.com> Date: Thu, 21 Nov 2024 00:13:21 +0100 Subject: [PATCH 8/9] Use public RPC if gRPC listen address is empty (#365) --- client/cmd/root.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/client/cmd/root.go b/client/cmd/root.go index dbbd034..292a816 100644 --- a/client/cmd/root.go +++ b/client/cmd/root.go @@ -111,7 +111,11 @@ var rootCmd = &cobra.Command{ } if publicRPC { - fmt.Println("gRPC not enabled, using light node") + fmt.Println("Public RPC enabled, using light node") + LightNode = true + } + if !LightNode && NodeConfig.ListenGRPCMultiaddr == "" { + fmt.Println("No ListenGRPCMultiaddr found in config, using light node") LightNode = true } }, From df77d408dc48523244064db63f154a42a9658782 Mon Sep 17 00:00:00 2001 From: Cassandra Heart Date: Tue, 19 Nov 2024 00:04:43 -0600 Subject: [PATCH 9/9] make it wait until one after --- node/consensus/data/main_data_loop.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/node/consensus/data/main_data_loop.go b/node/consensus/data/main_data_loop.go index dfba7b5..2c15b18 100644 --- a/node/consensus/data/main_data_loop.go +++ b/node/consensus/data/main_data_loop.go @@ -69,7 +69,7 @@ func (e *DataClockConsensusEngine) runFramePruning() { } if head.FrameNumber < uint64(e.config.Engine.MaxFrames)+1 || - head.FrameNumber <= application.PROOF_FRAME_SENIORITY_REPAIR { + head.FrameNumber <= application.PROOF_FRAME_SENIORITY_REPAIR+1 { continue }