repair tool

This commit is contained in:
Cassandra Heart 2024-10-29 20:58:23 -05:00
parent 6d6189d1ad
commit db5dcd2cfb
No known key found for this signature in database
GPG Key ID: 6352152859385958
4 changed files with 287 additions and 10 deletions

View File

@ -66,4 +66,9 @@ type FrameProver interface {
core uint32,
proof []byte,
) bool
RecalculatePreDuskChallengeProof(
challenge []byte,
core uint32,
increment uint32,
) ([]byte, error)
}

View File

@ -638,3 +638,24 @@ func (w *WesolowskiFrameProver) VerifyPreDuskChallengeProof(
check := vdf.WesolowskiVerify(b, difficulty, [516]byte(proof))
return check
}
func (w *WesolowskiFrameProver) RecalculatePreDuskChallengeProof(
challenge []byte,
core uint32,
increment uint32,
) ([]byte, error) {
difficulty := 200000 - (increment / 4)
if difficulty < 25000 || increment > 800000 {
difficulty = 25000
}
instanceInput := binary.BigEndian.AppendUint32([]byte{}, core)
instanceInput = append(instanceInput, challenge...)
b := sha3.Sum256(instanceInput)
o := vdf.WesolowskiSolve(b, uint32(difficulty))
output := make([]byte, 516)
copy(output[:], o[:])
return output, nil
}

View File

@ -27,6 +27,7 @@ import (
"golang.org/x/crypto/sha3"
"google.golang.org/protobuf/proto"
"source.quilibrium.com/quilibrium/monorepo/node/protobufs"
"source.quilibrium.com/quilibrium/monorepo/node/store"
"source.quilibrium.com/quilibrium/monorepo/node/utils"
"github.com/cloudflare/circl/sign/ed448"
@ -117,6 +118,11 @@ var (
false,
"runs an integrity check on the store, helpful for confirming backups are not corrupted (defaults to false)",
)
emergencyRepair = flag.Bool(
"emergency-repair",
false,
"performs an attempt at emergency repair. extremely dangerous, take a backup of your store before running.",
)
)
func signatureCheckDefault() bool {
@ -282,6 +288,43 @@ func main() {
panic(err)
}
if *emergencyRepair {
fmt.Println("Emergency Repair Mode")
fmt.Println("WARNING")
fmt.Println("WARNING")
fmt.Println("WARNING")
fmt.Println(
"This operation will try an attempt at repairing your 1.4.21.1 store. " +
"It is not guaranteed to work, and may make things worse. Before you " +
"run this, please take a backup of your store. Proofs generated by " +
"this repair tool will evaluate at single core, and earn less QUIL " +
"for the proofs produced than you would have previously earned with a " +
"valid backup. Do you wish to proceed?",
)
fmt.Println("WARNING")
fmt.Println("WARNING")
fmt.Println("WARNING")
fmt.Printf("Proceed? (Y/N): ")
var response string
_, err := fmt.Scanln(&response)
if err != nil {
fmt.Println("Invalid response, exiting without running repair.")
os.Exit(1)
}
response = strings.ToUpper(strings.TrimSpace(response))
if response == "Y" || response == "YES" {
runEmergencyRepair(nodeConfig)
} else {
fmt.Println(
"Did not receive confirmation, exiting without running repair.",
)
os.Exit(0)
}
}
if *network != 0 {
if nodeConfig.P2P.BootstrapPeers[0] == config.BootstrapPeers[0] {
fmt.Println(
@ -463,6 +506,208 @@ func main() {
node.Stop()
}
func runEmergencyRepair(cfg *config.Config) {
fmt.Println("Starting emergency repair.")
kzg.Init()
fmt.Println(
"Opening pebble database. If you see a invalid chunk error, your " +
"database is corrupted beyond the abilities of this tool to repair.",
)
db := store.NewPebbleDB(cfg.DB)
defer db.Close()
fmt.Println("Scanning for gaps in record...")
logger, err := zap.NewDevelopment()
if err != nil {
panic(err)
}
pstore := store.NewPebbleDataProofStore(db, logger)
peerId := getPeerID(cfg.P2P)
increment, _, _, err := pstore.GetLatestDataTimeProof([]byte(peerId))
if err != nil {
fmt.Println(
"Could not find latest proof. Please ensure you are using the correct " +
"config.yml and the path to the store in the config is correct. (Hint: " +
"try an absolute path for the store)",
)
os.Exit(1)
}
fmt.Println(
"Latest proof found, increment:", increment, " iterating to find gaps...",
)
gapStarts := []uint32{}
for i := uint32(0); i < increment; i++ {
fmt.Println("Checking increment", i)
_, _, _, _, err := pstore.GetDataTimeProof(
[]byte(peerId),
uint32(i),
)
if err != nil {
if !errors.Is(err, store.ErrNotFound) {
fmt.Println("Uncorrectable error detected: ", err)
os.Exit(1)
}
fmt.Println("Missing record at increment", i, " adding to repair set")
gapStarts = append(gapStarts, i-1)
}
}
if len(gapStarts) == 0 {
fmt.Println("No gaps found, quitting.")
os.Exit(0)
}
kprover := qcrypto.NewKZGInclusionProver(logger)
wprover := qcrypto.NewWesolowskiFrameProver(logger)
for _, gapPredecessor := range gapStarts {
prevIndex := -1
hashes := []byte{}
previousCommitment := []byte{}
proofs := [][]byte{}
commitment := []byte{}
_, _, _, previousOutput, err := pstore.GetDataTimeProof(
[]byte(peerId),
gapPredecessor,
)
if err != nil {
if errors.Is(err, store.ErrNotFound) && len(gapStarts) > 1 &&
gapPredecessor == uint32(0xFFFFFFFF) {
fmt.Println(
"Could not load predecessor data time proof, store is severely "+
"corrupted. Please review the logs above. If you encounter this "+
"scenario starting from increment 0 -",
gapStarts[len(gapStarts)-1],
"create a new 1.4.21.1 store, keeping this config.yml and "+
"keys.yml, and run the node up to",
gapStarts[len(gapStarts)-1],
)
}
fmt.Println("Uncorrectable error detected: ", err)
os.Exit(1)
}
_, _, previousCommitment, _ = app.GetOutputs(previousOutput)
fmt.Println(
"Missing record at increment", gapPredecessor+1, " repairing...",
)
input := []byte{}
input = append(input, []byte(peerId)...)
input = append(input, previousCommitment...)
proof, _ := wprover.RecalculatePreDuskChallengeProof(
input,
0,
gapPredecessor+1,
)
proofs = append(proofs, proof)
hashes, commitment, prevIndex = performDataCommitment(
kprover,
proofs,
1,
uint64(128),
)
p, err := kprover.ProveRaw(
hashes,
0,
uint64(128),
)
if err != nil {
fmt.Println("Error while proving", err, " stopping")
os.Exit(1)
}
output := serializeOutput(
uint32(prevIndex),
proofs,
commitment,
p,
)
txn, err := pstore.NewTransaction()
if err != nil {
fmt.Println("Error while preparing transaction", err, " stopping")
os.Exit(1)
}
fmt.Println("Storing repaired proof, increment:", gapPredecessor+1)
err = pstore.PutDataTimeProof(
txn,
1,
[]byte(peerId),
gapPredecessor+1,
previousCommitment,
output,
true,
)
if err != nil {
fmt.Println("Error while saving proof", err, " stopping")
os.Exit(1)
}
if err := txn.Commit(); err != nil {
fmt.Println("Error while committing transaction", err, " stopping")
os.Exit(1)
}
}
fmt.Println("Emergency repair completed successfully.")
os.Exit(0)
}
func serializeOutput(
previousIndex uint32,
previousOutputs [][]byte,
kzgCommitment []byte,
kzgProof []byte,
) []byte {
serializedOutput := []byte{}
serializedOutput = binary.BigEndian.AppendUint32(
serializedOutput,
previousIndex,
)
serializedOutput = append(serializedOutput, previousOutputs[previousIndex]...)
serializedOutput = append(serializedOutput, kzgCommitment...)
serializedOutput = append(serializedOutput, kzgProof...)
return serializedOutput
}
func performDataCommitment(
kprover *qcrypto.KZGInclusionProver,
proofs [][]byte,
parallelism int,
polySize uint64,
) ([]byte, []byte, int) {
// Take the VDF outputs and generate some deterministic outputs to feed
// into a KZG commitment:
output := []byte{}
for i := 0; i < len(proofs); i++ {
h := sha3.Sum512(proofs[i])
output = append(output, h[:]...)
}
nextInput, err := kprover.CommitRaw(output, polySize)
if err != nil {
panic(err)
}
inputHash := sha3.Sum256(nextInput)
inputHashBI := big.NewInt(0).SetBytes(inputHash[:])
prevIndex := int(inputHashBI.Mod(
inputHashBI,
big.NewInt(int64(parallelism)),
).Int64())
return output, nextInput, prevIndex
}
var dataWorkers []*exec.Cmd
func spawnDataWorkers(nodeConfig *config.Config) {

View File

@ -42,6 +42,7 @@ type DataProofStore interface {
increment uint32,
input []byte,
output []byte,
backfill bool,
) error
GetLatestDataTimeProof(peerId []byte) (
increment uint32,
@ -480,6 +481,7 @@ func (p *PebbleDataProofStore) PutDataTimeProof(
increment uint32,
input []byte,
output []byte,
backfill bool,
) error {
// Now, for the assumptions.
// Rewards are calculated based off of a current average rate of growth such
@ -505,16 +507,18 @@ func (p *PebbleDataProofStore) PutDataTimeProof(
return errors.Wrap(err, "put data time proof")
}
if len(prev) != 0 {
priorSum.SetBytes(prev[4:])
prevIncrement := binary.BigEndian.Uint32(prev[:4])
if !backfill {
if len(prev) != 0 {
priorSum.SetBytes(prev[4:])
prevIncrement := binary.BigEndian.Uint32(prev[:4])
if err = closer.Close(); err != nil {
return errors.Wrap(err, "put data time proof")
}
if err = closer.Close(); err != nil {
return errors.Wrap(err, "put data time proof")
}
if prevIncrement != increment-1 {
return errors.Wrap(errors.New("invalid increment"), "put data time proof")
if prevIncrement != increment-1 {
return errors.Wrap(errors.New("invalid increment"), "put data time proof")
}
}
}
@ -536,8 +540,10 @@ func (p *PebbleDataProofStore) PutDataTimeProof(
priorSum.Add(priorSum, reward)
latest = append(latest, priorSum.FillBytes(make([]byte, 32))...)
if err = txn.Set(dataTimeProofLatestKey(peerId), latest); err != nil {
return errors.Wrap(err, "put data time proof")
if !backfill {
if err = txn.Set(dataTimeProofLatestKey(peerId), latest); err != nil {
return errors.Wrap(err, "put data time proof")
}
}
return nil