From db5dcd2cfb83c8b792e1bee25333fd4b04b49348 Mon Sep 17 00:00:00 2001 From: Cassandra Heart Date: Tue, 29 Oct 2024 20:58:23 -0500 Subject: [PATCH] repair tool --- node/crypto/frame_prover.go | 5 + node/crypto/wesolowski_frame_prover.go | 21 +++ node/main.go | 245 +++++++++++++++++++++++++ node/store/data_proof.go | 26 ++- 4 files changed, 287 insertions(+), 10 deletions(-) diff --git a/node/crypto/frame_prover.go b/node/crypto/frame_prover.go index 4cae2ce..95364cc 100644 --- a/node/crypto/frame_prover.go +++ b/node/crypto/frame_prover.go @@ -66,4 +66,9 @@ type FrameProver interface { core uint32, proof []byte, ) bool + RecalculatePreDuskChallengeProof( + challenge []byte, + core uint32, + increment uint32, + ) ([]byte, error) } diff --git a/node/crypto/wesolowski_frame_prover.go b/node/crypto/wesolowski_frame_prover.go index bc2e539..4ff29f0 100644 --- a/node/crypto/wesolowski_frame_prover.go +++ b/node/crypto/wesolowski_frame_prover.go @@ -638,3 +638,24 @@ func (w *WesolowskiFrameProver) VerifyPreDuskChallengeProof( check := vdf.WesolowskiVerify(b, difficulty, [516]byte(proof)) return check } + +func (w *WesolowskiFrameProver) RecalculatePreDuskChallengeProof( + challenge []byte, + core uint32, + increment uint32, +) ([]byte, error) { + difficulty := 200000 - (increment / 4) + if difficulty < 25000 || increment > 800000 { + difficulty = 25000 + } + + instanceInput := binary.BigEndian.AppendUint32([]byte{}, core) + instanceInput = append(instanceInput, challenge...) + b := sha3.Sum256(instanceInput) + o := vdf.WesolowskiSolve(b, uint32(difficulty)) + + output := make([]byte, 516) + copy(output[:], o[:]) + + return output, nil +} diff --git a/node/main.go b/node/main.go index c330c67..8996719 100644 --- a/node/main.go +++ b/node/main.go @@ -27,6 +27,7 @@ import ( "golang.org/x/crypto/sha3" "google.golang.org/protobuf/proto" "source.quilibrium.com/quilibrium/monorepo/node/protobufs" + "source.quilibrium.com/quilibrium/monorepo/node/store" "source.quilibrium.com/quilibrium/monorepo/node/utils" "github.com/cloudflare/circl/sign/ed448" @@ -117,6 +118,11 @@ var ( false, "runs an integrity check on the store, helpful for confirming backups are not corrupted (defaults to false)", ) + emergencyRepair = flag.Bool( + "emergency-repair", + false, + "performs an attempt at emergency repair. extremely dangerous, take a backup of your store before running.", + ) ) func signatureCheckDefault() bool { @@ -282,6 +288,43 @@ func main() { panic(err) } + if *emergencyRepair { + fmt.Println("Emergency Repair Mode") + fmt.Println("WARNING") + fmt.Println("WARNING") + fmt.Println("WARNING") + fmt.Println( + "This operation will try an attempt at repairing your 1.4.21.1 store. " + + "It is not guaranteed to work, and may make things worse. Before you " + + "run this, please take a backup of your store. Proofs generated by " + + "this repair tool will evaluate at single core, and earn less QUIL " + + "for the proofs produced than you would have previously earned with a " + + "valid backup. Do you wish to proceed?", + ) + fmt.Println("WARNING") + fmt.Println("WARNING") + fmt.Println("WARNING") + + fmt.Printf("Proceed? (Y/N): ") + + var response string + _, err := fmt.Scanln(&response) + if err != nil { + fmt.Println("Invalid response, exiting without running repair.") + os.Exit(1) + } + + response = strings.ToUpper(strings.TrimSpace(response)) + if response == "Y" || response == "YES" { + runEmergencyRepair(nodeConfig) + } else { + fmt.Println( + "Did not receive confirmation, exiting without running repair.", + ) + os.Exit(0) + } + } + if *network != 0 { if nodeConfig.P2P.BootstrapPeers[0] == config.BootstrapPeers[0] { fmt.Println( @@ -463,6 +506,208 @@ func main() { node.Stop() } +func runEmergencyRepair(cfg *config.Config) { + fmt.Println("Starting emergency repair.") + kzg.Init() + fmt.Println( + "Opening pebble database. If you see a invalid chunk error, your " + + "database is corrupted beyond the abilities of this tool to repair.", + ) + + db := store.NewPebbleDB(cfg.DB) + defer db.Close() + fmt.Println("Scanning for gaps in record...") + + logger, err := zap.NewDevelopment() + if err != nil { + panic(err) + } + + pstore := store.NewPebbleDataProofStore(db, logger) + peerId := getPeerID(cfg.P2P) + + increment, _, _, err := pstore.GetLatestDataTimeProof([]byte(peerId)) + if err != nil { + fmt.Println( + "Could not find latest proof. Please ensure you are using the correct " + + "config.yml and the path to the store in the config is correct. (Hint: " + + "try an absolute path for the store)", + ) + os.Exit(1) + } + + fmt.Println( + "Latest proof found, increment:", increment, " – iterating to find gaps...", + ) + + gapStarts := []uint32{} + + for i := uint32(0); i < increment; i++ { + fmt.Println("Checking increment", i) + _, _, _, _, err := pstore.GetDataTimeProof( + []byte(peerId), + uint32(i), + ) + + if err != nil { + if !errors.Is(err, store.ErrNotFound) { + fmt.Println("Uncorrectable error detected: ", err) + os.Exit(1) + } + + fmt.Println("Missing record at increment", i, " – adding to repair set") + gapStarts = append(gapStarts, i-1) + } + } + + if len(gapStarts) == 0 { + fmt.Println("No gaps found, quitting.") + os.Exit(0) + } + + kprover := qcrypto.NewKZGInclusionProver(logger) + wprover := qcrypto.NewWesolowskiFrameProver(logger) + + for _, gapPredecessor := range gapStarts { + prevIndex := -1 + hashes := []byte{} + previousCommitment := []byte{} + proofs := [][]byte{} + commitment := []byte{} + _, _, _, previousOutput, err := pstore.GetDataTimeProof( + []byte(peerId), + gapPredecessor, + ) + if err != nil { + if errors.Is(err, store.ErrNotFound) && len(gapStarts) > 1 && + gapPredecessor == uint32(0xFFFFFFFF) { + fmt.Println( + "Could not load predecessor data time proof, store is severely "+ + "corrupted. Please review the logs above. If you encounter this "+ + "scenario starting from increment 0 -", + gapStarts[len(gapStarts)-1], + "create a new 1.4.21.1 store, keeping this config.yml and "+ + "keys.yml, and run the node up to", + gapStarts[len(gapStarts)-1], + ) + } + fmt.Println("Uncorrectable error detected: ", err) + os.Exit(1) + } + _, _, previousCommitment, _ = app.GetOutputs(previousOutput) + + fmt.Println( + "Missing record at increment", gapPredecessor+1, "– repairing...", + ) + + input := []byte{} + input = append(input, []byte(peerId)...) + input = append(input, previousCommitment...) + proof, _ := wprover.RecalculatePreDuskChallengeProof( + input, + 0, + gapPredecessor+1, + ) + proofs = append(proofs, proof) + + hashes, commitment, prevIndex = performDataCommitment( + kprover, + proofs, + 1, + uint64(128), + ) + + p, err := kprover.ProveRaw( + hashes, + 0, + uint64(128), + ) + if err != nil { + fmt.Println("Error while proving", err, "– stopping") + os.Exit(1) + } + + output := serializeOutput( + uint32(prevIndex), + proofs, + commitment, + p, + ) + + txn, err := pstore.NewTransaction() + if err != nil { + fmt.Println("Error while preparing transaction", err, "– stopping") + os.Exit(1) + } + + fmt.Println("Storing repaired proof, increment:", gapPredecessor+1) + err = pstore.PutDataTimeProof( + txn, + 1, + []byte(peerId), + gapPredecessor+1, + previousCommitment, + output, + true, + ) + if err != nil { + fmt.Println("Error while saving proof", err, "– stopping") + os.Exit(1) + } + + if err := txn.Commit(); err != nil { + fmt.Println("Error while committing transaction", err, "– stopping") + os.Exit(1) + } + } + + fmt.Println("Emergency repair completed successfully.") + os.Exit(0) +} + +func serializeOutput( + previousIndex uint32, + previousOutputs [][]byte, + kzgCommitment []byte, + kzgProof []byte, +) []byte { + serializedOutput := []byte{} + serializedOutput = binary.BigEndian.AppendUint32( + serializedOutput, + previousIndex, + ) + serializedOutput = append(serializedOutput, previousOutputs[previousIndex]...) + serializedOutput = append(serializedOutput, kzgCommitment...) + serializedOutput = append(serializedOutput, kzgProof...) + return serializedOutput +} + +func performDataCommitment( + kprover *qcrypto.KZGInclusionProver, + proofs [][]byte, + parallelism int, + polySize uint64, +) ([]byte, []byte, int) { + // Take the VDF outputs and generate some deterministic outputs to feed + // into a KZG commitment: + output := []byte{} + for i := 0; i < len(proofs); i++ { + h := sha3.Sum512(proofs[i]) + output = append(output, h[:]...) + } + nextInput, err := kprover.CommitRaw(output, polySize) + if err != nil { + panic(err) + } + inputHash := sha3.Sum256(nextInput) + inputHashBI := big.NewInt(0).SetBytes(inputHash[:]) + prevIndex := int(inputHashBI.Mod( + inputHashBI, + big.NewInt(int64(parallelism)), + ).Int64()) + return output, nextInput, prevIndex +} + var dataWorkers []*exec.Cmd func spawnDataWorkers(nodeConfig *config.Config) { diff --git a/node/store/data_proof.go b/node/store/data_proof.go index b41713e..d749335 100644 --- a/node/store/data_proof.go +++ b/node/store/data_proof.go @@ -42,6 +42,7 @@ type DataProofStore interface { increment uint32, input []byte, output []byte, + backfill bool, ) error GetLatestDataTimeProof(peerId []byte) ( increment uint32, @@ -480,6 +481,7 @@ func (p *PebbleDataProofStore) PutDataTimeProof( increment uint32, input []byte, output []byte, + backfill bool, ) error { // Now, for the assumptions. // Rewards are calculated based off of a current average rate of growth such @@ -505,16 +507,18 @@ func (p *PebbleDataProofStore) PutDataTimeProof( return errors.Wrap(err, "put data time proof") } - if len(prev) != 0 { - priorSum.SetBytes(prev[4:]) - prevIncrement := binary.BigEndian.Uint32(prev[:4]) + if !backfill { + if len(prev) != 0 { + priorSum.SetBytes(prev[4:]) + prevIncrement := binary.BigEndian.Uint32(prev[:4]) - if err = closer.Close(); err != nil { - return errors.Wrap(err, "put data time proof") - } + if err = closer.Close(); err != nil { + return errors.Wrap(err, "put data time proof") + } - if prevIncrement != increment-1 { - return errors.Wrap(errors.New("invalid increment"), "put data time proof") + if prevIncrement != increment-1 { + return errors.Wrap(errors.New("invalid increment"), "put data time proof") + } } } @@ -536,8 +540,10 @@ func (p *PebbleDataProofStore) PutDataTimeProof( priorSum.Add(priorSum, reward) latest = append(latest, priorSum.FillBytes(make([]byte, 32))...) - if err = txn.Set(dataTimeProofLatestKey(peerId), latest); err != nil { - return errors.Wrap(err, "put data time proof") + if !backfill { + if err = txn.Set(dataTimeProofLatestKey(peerId), latest); err != nil { + return errors.Wrap(err, "put data time proof") + } } return nil