ceremonyclient/node/consensus/events/metrics.go
Cassandra Heart 12996487c3
v2.1.0.18 (#508)
* experiment: reject bad peer info messages

* v2.1.0.18 preview

* add tagged sync

* Add missing hypergraph changes

* small tweaks to sync

* allow local sync, use it for provers with workers

* missing file

* resolve build error

* resolve sync issue, remove raw sync

* resolve deletion promotion bug

* resolve sync abstraction leak from tree deletion changes

* rearrange prover sync

* remove pruning from sync

* restore removed sync flag

* fix: sync, event stream deadlock, heuristic scoring of better shards

* resolve hanging shutdown + pubsub proxy issue

* further bugfixes: sync (restore old leaf sync), pubsub shutdown, merge events

* fix: clean up rust ffi, background coverage events, and sync tweaks

* fix: linking issue for channel, connectivity test aggression, sync regression, join tests

* fix: disjoint sync, improper application of filter

* resolve sync/reel/validation deadlock

* adjust sync to handle no leaf edge cases, multi-path segment traversal

* use simpler sync

* faster, simpler sync with some debug extras

* migration to recalculate

* don't use batch

* square up the roots

* fix nil pointer

* fix: seniority calculation, sync race condition, migration

* make sync dumber

* fix: tree deletion issue

* fix: missing seniority merge request canonical serialization

* address issues from previous commit test

* stale workers should be cleared

* remove missing gap check

* rearrange collect, reduce sync logging noise

* fix: the disjoint leaf/branch sync case

* nuclear option on sync failures

* v2.1.0.18, finalized
2026-02-08 23:51:51 -06:00

160 lines
4.2 KiB
Go

package events
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"source.quilibrium.com/quilibrium/monorepo/types/consensus"
)
const (
metricsNamespace = "quilibrium"
subsystem = "event_distributor"
)
var (
// Event processing metrics
eventsProcessedTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: subsystem,
Name: "events_processed_total",
Help: "Total number of events processed by the distributor",
},
// distributor_type: "global" or "app", event_type: see getEventTypeString
[]string{"distributor_type", "event_type"},
)
eventProcessingDuration = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: metricsNamespace,
Subsystem: subsystem,
Name: "event_processing_duration_seconds",
Help: "Time taken to process and broadcast an event",
Buckets: prometheus.DefBuckets,
},
[]string{"distributor_type"},
)
// Subscriber metrics
subscribersCount = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metricsNamespace,
Subsystem: subsystem,
Name: "subscribers_count",
Help: "Current number of active subscribers",
},
[]string{"distributor_type"},
)
subscriptionsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: subsystem,
Name: "subscriptions_total",
Help: "Total number of subscriptions created",
},
[]string{"distributor_type"},
)
unsubscriptionsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: subsystem,
Name: "unsubscriptions_total",
Help: "Total number of unsubscriptions",
},
[]string{"distributor_type"},
)
// Broadcast metrics
broadcastsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: subsystem,
Name: "broadcasts_total",
Help: "Total number of event broadcasts",
},
[]string{"distributor_type", "event_type"},
)
eventsDroppedTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: subsystem,
Name: "events_dropped_total",
Help: "Total number of events dropped due to full subscriber channel",
},
[]string{"distributor_type", "event_type", "subscriber_id"},
)
broadcastDuration = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: metricsNamespace,
Subsystem: subsystem,
Name: "broadcast_duration_seconds",
Help: "Time taken to broadcast an event to all subscribers",
Buckets: prometheus.DefBuckets,
},
[]string{"distributor_type"},
)
// Lifecycle metrics
distributorStartsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: subsystem,
Name: "starts_total",
Help: "Total number of distributor starts",
},
[]string{"distributor_type"},
)
distributorStopsTotal = promauto.NewCounterVec(
prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: subsystem,
Name: "stops_total",
Help: "Total number of distributor stops",
},
[]string{"distributor_type"},
)
distributorUptime = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metricsNamespace,
Subsystem: subsystem,
Name: "uptime_seconds",
Help: "Time since the distributor was started",
},
[]string{"distributor_type"},
)
)
// Helper function to get event type string for metrics
func getEventTypeString(eventType consensus.ControlEventType) string {
switch eventType {
case consensus.ControlEventGlobalNewHead:
return "global_new_head"
case consensus.ControlEventGlobalFork:
return "global_fork"
case consensus.ControlEventGlobalEquivocation:
return "global_equivocation"
case consensus.ControlEventAppNewHead:
return "app_new_head"
case consensus.ControlEventAppFork:
return "app_fork"
case consensus.ControlEventAppEquivocation:
return "app_equivocation"
case consensus.ControlEventStart:
return "start"
case consensus.ControlEventStop:
return "stop"
case consensus.ControlEventHalt:
return "halt"
case consensus.ControlEventResume:
return "resume"
default:
return "unknown"
}
}