mirror of
https://github.com/QuilibriumNetwork/ceremonyclient.git
synced 2026-02-25 12:27:24 +08:00
additional logging to isolate respawn quirks
This commit is contained in:
parent
13a4fb24be
commit
72dfec697b
@ -744,6 +744,7 @@ func NewAppConsensusEngine(
|
||||
// identify which worker(s) hang during shutdown.
|
||||
namedWorker := func(name string, fn func(lifecycle.SignalerContext, lifecycle.ReadyFunc)) lifecycle.ComponentWorker {
|
||||
return func(ctx lifecycle.SignalerContext, ready lifecycle.ReadyFunc) {
|
||||
engine.logger.Debug("worker starting", zap.String("worker", name))
|
||||
defer engine.logger.Debug("worker stopped", zap.String("worker", name))
|
||||
fn(ctx, ready)
|
||||
}
|
||||
@ -966,6 +967,7 @@ func NewAppConsensusEngine(
|
||||
}
|
||||
|
||||
func (e *AppConsensusEngine) Stop(force bool) <-chan error {
|
||||
e.logger.Info("app engine stopping", zap.Bool("force", force))
|
||||
errChan := make(chan error, 1)
|
||||
|
||||
// First, cancel context to signal all goroutines to stop
|
||||
@ -2415,6 +2417,8 @@ func (e *AppConsensusEngine) startConsensus(
|
||||
e.timeoutAggregator.Start(ctx)
|
||||
<-lifecycle.AllReady(e.voteAggregator, e.timeoutAggregator)
|
||||
e.consensusParticipant.Start(ctx)
|
||||
e.logger.Info("consensus started successfully",
|
||||
zap.String("shard_address", e.appAddressHex))
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@ -184,11 +184,13 @@ func (r *DataWorkerIPCServer) RespawnServer(filter []byte) error {
|
||||
// complete, but those handlers won't stop until the engine context is
|
||||
// cancelled. Reversing the order avoids a deadlock.
|
||||
if r.appConsensusEngine != nil {
|
||||
r.logger.Info("respawning worker: stopping old engine")
|
||||
if r.cancel != nil {
|
||||
r.cancel()
|
||||
}
|
||||
<-r.appConsensusEngine.Stop(false)
|
||||
r.appConsensusEngine = nil
|
||||
r.logger.Info("respawning worker: old engine stopped")
|
||||
}
|
||||
if r.server != nil {
|
||||
r.logger.Info("stopping server for respawn")
|
||||
@ -285,16 +287,26 @@ func (r *DataWorkerIPCServer) RespawnServer(filter []byte) error {
|
||||
return errors.Wrap(err, "respawn server")
|
||||
}
|
||||
|
||||
r.ctx, r.cancel, _ = lifecycle.WithSignallerAndCancel(context.Background())
|
||||
var errCh <-chan error
|
||||
r.ctx, r.cancel, errCh = lifecycle.WithSignallerAndCancel(context.Background())
|
||||
// Capture engine and ctx in local variables to avoid race with subsequent RespawnServer calls
|
||||
engine := r.appConsensusEngine
|
||||
ctx := r.ctx
|
||||
go func() {
|
||||
if err, ok := <-errCh; ok && err != nil {
|
||||
r.logger.Error("app engine fatal error during respawn",
|
||||
zap.Error(err))
|
||||
}
|
||||
}()
|
||||
r.logger.Info("respawning worker: engine created, starting")
|
||||
go func() {
|
||||
if engine == nil {
|
||||
return
|
||||
}
|
||||
if err = engine.Start(ctx); err != nil {
|
||||
r.logger.Error("error while running", zap.Error(err))
|
||||
r.logger.Error("respawning worker: engine start failed", zap.Error(err))
|
||||
} else {
|
||||
r.logger.Info("respawning worker: engine started successfully")
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user