mirror of
https://github.com/QuilibriumNetwork/ceremonyclient.git
synced 2026-03-01 22:37:27 +08:00
* v2.1.0.2 * restore tweaks to simlibp2p * fix: nil ref on size calc * fix: panic should induce shutdown from event_distributor * fix: friendlier initialization that requires less manual kickstarting for test/devnets * fix: fewer available shards than provers should choose shard length * fix: update stored worker registry, improve logging for debug mode * fix: shut the fuck up, peer log * qol: log value should be snake cased * fix:non-archive snap sync issues * fix: separate X448/Decaf448 signed keys, add onion key to registry * fix: overflow arithmetic on frame number comparison * fix: worker registration should be idempotent if inputs are same, otherwise permit updated records * fix: remove global prover state from size calculation * fix: divide by zero case * fix: eager prover * fix: broadcast listener default * qol: diagnostic data for peer authenticator * fix: master/worker connectivity issue in sparse networks tight coupling of peer and workers can sometimes interfere if mesh is sparse, so give workers a pseudoidentity but publish messages with the proper peer key * fix: reorder steps of join creation * fix: join verify frame source + ensure domain is properly padded (unnecessary but good for consistency) * fix: add delegate to protobuf <-> reified join conversion * fix: preempt prover from planning with no workers * fix: use the unallocated workers to generate a proof * qol: underflow causes join fail in first ten frames on test/devnets * qol: small logging tweaks for easier log correlation in debug mode * qol: use fisher-yates shuffle to ensure prover allocations are evenly distributed when scores are equal * qol: separate decisional logic on post-enrollment confirmation into consensus engine, proposer, and worker manager where relevant, refactor out scoring * reuse shard descriptors for both join planning and confirm/reject decisions * fix: add missing interface method and amend test blossomsub to use new peer id basis * fix: only check allocations if they exist * fix: pomw mint proof data needs to be hierarchically under global intrinsic domain * staging temporary state under diagnostics * fix: first phase of distributed lock refactoring * fix: compute intrinsic locking * fix: hypergraph intrinsic locking * fix: token intrinsic locking * fix: update execution engines to support new locking model * fix: adjust tests with new execution shape * fix: weave in lock/unlock semantics to liveness provider * fix lock fallthrough, add missing allocation update * qol: additional logging for diagnostics, also testnet/devnet handling for confirmations * fix: establish grace period on halt scenario to permit recovery * fix: support test/devnet defaults for coverage scenarios * fix: nil ref on consensus halts for non-archive nodes * fix: remove unnecessary prefix from prover ref * add test coverage for fork choice behaviors and replay – once passing, blocker (2) is resolved * fix: no fork replay on repeat for non-archive nodes, snap now behaves correctly * rollup of pre-liveness check lock interactions * ahead of tests, get the protobuf/metrics-related changes out so teams can prepare * add test coverage for distributed lock behaviors – once passing, blocker (3) is resolved * fix: blocker (3) * Dev docs improvements (#445) * Make install deps script more robust * Improve testing instructions * Worker node should stop upon OS SIGINT/SIGTERM signal (#447) * move pebble close to Stop() * move deferred Stop() to Start() * add core id to worker stop log message * create done os signal channel and stop worker upon message to it --------- Co-authored-by: Cassandra Heart <7929478+CassOnMars@users.noreply.github.com> --------- Co-authored-by: Daz <daz_the_corgi@proton.me> Co-authored-by: Black Swan <3999712+blacks1ne@users.noreply.github.com>
429 lines
13 KiB
Go
429 lines
13 KiB
Go
package app
|
|
|
|
import (
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
|
)
|
|
|
|
const (
|
|
metricsNamespace = "quilibrium"
|
|
subsystem = "app_consensus"
|
|
)
|
|
|
|
var (
|
|
// Frame processing metrics
|
|
framesProcessedTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "frames_processed_total",
|
|
Help: "Total number of frames processed by the app consensus engine",
|
|
},
|
|
[]string{"app_address", "status"}, // status: "success", "error", "invalid"
|
|
)
|
|
|
|
frameProcessingDuration = promauto.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "frame_processing_duration_seconds",
|
|
Help: "Time taken to process a frame",
|
|
Buckets: prometheus.DefBuckets,
|
|
},
|
|
[]string{"app_address"},
|
|
)
|
|
|
|
// Frame validation metrics
|
|
frameValidationTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "frame_validation_total",
|
|
Help: "Total number of frame validations",
|
|
},
|
|
[]string{"app_address", "result"}, // result: "accept", "reject", "ignore"
|
|
)
|
|
|
|
frameValidationDuration = promauto.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "frame_validation_duration_seconds",
|
|
Help: "Time taken to validate a frame",
|
|
Buckets: prometheus.DefBuckets,
|
|
},
|
|
[]string{"app_address"},
|
|
)
|
|
|
|
// Frame proving metrics
|
|
frameProvingTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "frame_proving_total",
|
|
Help: "Total number of frame proving attempts",
|
|
},
|
|
[]string{"app_address", "status"}, // status: "success", "error", "skipped"
|
|
)
|
|
|
|
frameProvingDuration = promauto.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "frame_proving_duration_seconds",
|
|
Help: "Time taken to prove a frame",
|
|
Buckets: []float64{0.1, 0.5, 1, 2, 5, 10, 30, 60}, // Up to 1 minute
|
|
},
|
|
[]string{"app_address"},
|
|
)
|
|
|
|
// Frame publishing metrics
|
|
framePublishingTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "frame_publishing_total",
|
|
Help: "Total number of frame publishing attempts",
|
|
},
|
|
[]string{"app_address", "status"}, // status: "success", "error"
|
|
)
|
|
|
|
framePublishingDuration = promauto.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "frame_publishing_duration_seconds",
|
|
Help: "Time taken to publish a frame",
|
|
Buckets: prometheus.DefBuckets,
|
|
},
|
|
[]string{"app_address"},
|
|
)
|
|
|
|
// Shard liveness check processing metrics
|
|
livenessCheckProcessedTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "liveness_check_processed_total",
|
|
Help: "Total number of shard liveness checks processed by the app consensus engine",
|
|
},
|
|
[]string{"app_address", "status"}, // status: "success", "error", "invalid"
|
|
)
|
|
|
|
livenessCheckProcessingDuration = promauto.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "liveness_check_processing_duration_seconds",
|
|
Help: "Time taken to process a shard liveness check",
|
|
Buckets: prometheus.DefBuckets,
|
|
},
|
|
[]string{"app_address"},
|
|
)
|
|
|
|
// Shard liveness check validation metrics
|
|
livenessCheckValidationTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "liveness_check_validation_total",
|
|
Help: "Total number of shard liveness check validations",
|
|
},
|
|
[]string{"app_address", "result"}, // result: "accept", "reject", "ignore"
|
|
)
|
|
|
|
livenessCheckValidationDuration = promauto.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "liveness_check_validation_duration_seconds",
|
|
Help: "Time taken to validate a shard liveness check",
|
|
Buckets: prometheus.DefBuckets,
|
|
},
|
|
[]string{"app_address"},
|
|
)
|
|
|
|
// Shard vote processing metrics
|
|
voteProcessedTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "vote_processed_total",
|
|
Help: "Total number of shard votes processed by the app consensus engine",
|
|
},
|
|
[]string{"app_address", "status"}, // status: "success", "error", "invalid"
|
|
)
|
|
|
|
voteProcessingDuration = promauto.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "vote_processing_duration_seconds",
|
|
Help: "Time taken to process a shard vote",
|
|
Buckets: prometheus.DefBuckets,
|
|
},
|
|
[]string{"app_address"},
|
|
)
|
|
|
|
// Shard vote validation metrics
|
|
voteValidationTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "vote_validation_total",
|
|
Help: "Total number of shard vote validations",
|
|
},
|
|
[]string{"app_address", "result"}, // result: "accept", "reject", "ignore"
|
|
)
|
|
|
|
voteValidationDuration = promauto.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "vote_validation_duration_seconds",
|
|
Help: "Time taken to validate a shard vote",
|
|
Buckets: prometheus.DefBuckets,
|
|
},
|
|
[]string{"app_address"},
|
|
)
|
|
|
|
// Shard confirmation processing metrics
|
|
confirmationProcessedTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "confirmation_processed_total",
|
|
Help: "Total number of shard confirmations processed by the app consensus engine",
|
|
},
|
|
[]string{"app_address", "status"}, // status: "success", "error", "invalid"
|
|
)
|
|
|
|
confirmationProcessingDuration = promauto.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "confirmation_processing_duration_seconds",
|
|
Help: "Time taken to process a shard confirmation",
|
|
Buckets: prometheus.DefBuckets,
|
|
},
|
|
[]string{"app_address"},
|
|
)
|
|
|
|
// Shard confirmation validation metrics
|
|
confirmationValidationTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "confirmation_validation_total",
|
|
Help: "Total number of shard confirmation validations",
|
|
},
|
|
[]string{"app_address", "result"}, // result: "accept", "reject", "ignore"
|
|
)
|
|
|
|
confirmationValidationDuration = promauto.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "confirmation_validation_duration_seconds",
|
|
Help: "Time taken to validate a shard confirmation",
|
|
Buckets: prometheus.DefBuckets,
|
|
},
|
|
[]string{"app_address"},
|
|
)
|
|
|
|
// Shard proposal processing metrics
|
|
proposalProcessedTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "proposal_processed_total",
|
|
Help: "Total number of shard proposals processed by the app consensus engine",
|
|
},
|
|
[]string{"app_address", "status"}, // status: "success", "error", "invalid"
|
|
)
|
|
|
|
proposalProcessingDuration = promauto.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "proposal_processing_duration_seconds",
|
|
Help: "Time taken to process a shard proposal",
|
|
Buckets: prometheus.DefBuckets,
|
|
},
|
|
[]string{"app_address"},
|
|
)
|
|
|
|
// Shard proposal validation metrics
|
|
proposalValidationTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "proposal_validation_total",
|
|
Help: "Total number of shard proposal validations",
|
|
},
|
|
[]string{"app_address", "result"}, // result: "accept", "reject", "ignore"
|
|
)
|
|
|
|
proposalValidationDuration = promauto.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "proposal_validation_duration_seconds",
|
|
Help: "Time taken to validate a shard proposal",
|
|
Buckets: prometheus.DefBuckets,
|
|
},
|
|
[]string{"app_address"},
|
|
)
|
|
|
|
// Global frame processing metrics
|
|
globalFramesProcessedTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "global_frames_processed_total",
|
|
Help: "Total number of frames processed by the app consensus engine",
|
|
},
|
|
[]string{"status"}, // status: "success", "error", "invalid"
|
|
)
|
|
|
|
globalFrameProcessingDuration = promauto.NewHistogram(
|
|
prometheus.HistogramOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "global_frame_processing_duration_seconds",
|
|
Help: "Time taken to process a global frame",
|
|
Buckets: prometheus.DefBuckets,
|
|
},
|
|
)
|
|
|
|
// Frame validation metrics
|
|
globalFrameValidationTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "global_frame_validation_total",
|
|
Help: "Total number of global frame validations",
|
|
},
|
|
[]string{"result"}, // result: "accept", "reject", "ignore"
|
|
)
|
|
|
|
globalFrameValidationDuration = promauto.NewHistogram(
|
|
prometheus.HistogramOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "global_frame_validation_duration_seconds",
|
|
Help: "Time taken to validate a global frame",
|
|
Buckets: prometheus.DefBuckets,
|
|
},
|
|
)
|
|
|
|
// Transaction collection metrics
|
|
transactionsCollectedTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "transactions_collected_total",
|
|
Help: "Total number of transactions collected",
|
|
},
|
|
[]string{"app_address"},
|
|
)
|
|
|
|
pendingMessagesCount = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "pending_messages_count",
|
|
Help: "Current number of pending messages",
|
|
},
|
|
[]string{"app_address"},
|
|
)
|
|
|
|
// Executor metrics
|
|
executorsRegistered = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "executors_registered",
|
|
Help: "Current number of registered executors",
|
|
},
|
|
[]string{"app_address"},
|
|
)
|
|
|
|
executorRegistrationTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "executor_registration_total",
|
|
Help: "Total number of executor registrations",
|
|
},
|
|
[]string{"app_address", "action"}, // action: "register", "unregister"
|
|
)
|
|
|
|
// Sync status metrics
|
|
syncStatusCheck = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "sync_status_check_total",
|
|
Help: "Total number of sync status checks",
|
|
},
|
|
[]string{"app_address", "result"}, // result: "synced", "syncing"
|
|
)
|
|
|
|
// Engine state metrics
|
|
engineState = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "engine_state",
|
|
Help: "Current state of the app consensus engine (0=stopped, 1=starting, 2=loading, 3=collecting, 4=proving, 5=publishing, 6=verifying, 7=stopping)",
|
|
},
|
|
[]string{"app_address"},
|
|
)
|
|
|
|
// Difficulty metrics
|
|
currentDifficulty = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "current_difficulty",
|
|
Help: "Current difficulty value for the app consensus",
|
|
},
|
|
[]string{"app_address"},
|
|
)
|
|
|
|
// Time since last proven frame
|
|
timeSinceLastProvenFrame = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "time_since_last_proven_frame_seconds",
|
|
Help: "Time in seconds since the last frame was proven",
|
|
},
|
|
[]string{"app_address"},
|
|
)
|
|
|
|
// Current frame metrics
|
|
currentFrameNumber = promauto.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "current_frame_number",
|
|
Help: "Current frame number being processed",
|
|
},
|
|
[]string{"app_address"},
|
|
)
|
|
|
|
// Prover key lookup metrics
|
|
proverKeyLookupTotal = promauto.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: metricsNamespace,
|
|
Subsystem: subsystem,
|
|
Name: "prover_key_lookup_total",
|
|
Help: "Total number of prover key lookups",
|
|
},
|
|
// result: "found", "not_found", "error"
|
|
[]string{"app_address", "result"},
|
|
)
|
|
)
|