diff --git a/.github/legacy/Dockerfile.goipfs-stub b/.github/legacy/Dockerfile.goipfs-stub new file mode 100644 index 000000000..74bed264d --- /dev/null +++ b/.github/legacy/Dockerfile.goipfs-stub @@ -0,0 +1,26 @@ +# syntax=docker/dockerfile:1 +# Stub Dockerfile for the deprecated 'ipfs/go-ipfs' image name. +# This image redirects users to the new 'ipfs/kubo' name. +FROM busybox:stable-glibc + +# Copy stub entrypoint that displays deprecation message +COPY .github/legacy/goipfs_stub.sh /usr/local/bin/ipfs + +# Make it executable +RUN chmod +x /usr/local/bin/ipfs + +# Use the same ports as the real image for compatibility +EXPOSE 4001 4001/udp 5001 8080 8081 + +# Create ipfs user for consistency +ENV IPFS_PATH=/data/ipfs +RUN mkdir -p $IPFS_PATH \ + && adduser -D -h $IPFS_PATH -u 1000 -G users ipfs \ + && chown ipfs:users $IPFS_PATH + +# Run as ipfs user +USER ipfs + +# The stub script will run and exit with an error message +ENTRYPOINT ["/usr/local/bin/ipfs"] +CMD ["daemon"] diff --git a/.github/legacy/goipfs_stub.sh b/.github/legacy/goipfs_stub.sh new file mode 100755 index 000000000..15185ce7e --- /dev/null +++ b/.github/legacy/goipfs_stub.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# Stub script for the deprecated 'ipfs/go-ipfs' Docker image. +# This informs users to switch to 'ipfs/kubo'. + +cat >&2 <<'EOF' +ERROR: The name 'go-ipfs' is no longer used. + +Please update your Docker scripts to use 'ipfs/kubo' instead of 'ipfs/go-ipfs'. + +For example: + docker pull ipfs/kubo:release + +More information: + - https://github.com/ipfs/kubo#docker + - https://hub.docker.com/r/ipfs/kubo + - https://docs.ipfs.tech/install/run-ipfs-inside-docker/ + +EOF + +exit 1 diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 4564c060e..f2566d88a 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -39,7 +39,8 @@ jobs: timeout-minutes: 15 env: IMAGE_NAME: ipfs/kubo - LEGACY_IMAGE_NAME: ipfs/go-ipfs + outputs: + tags: ${{ steps.tags.outputs.value }} steps: - name: Check out the repo uses: actions/checkout@v5 @@ -140,3 +141,52 @@ jobs: cache-to: | type=gha,mode=max type=registry,ref=${{ env.IMAGE_NAME }}:buildcache,mode=max + + # Build and push stub image to the legacy ipfs/go-ipfs name + # This redirects users to use ipfs/kubo instead + legacy-name: + needs: docker-hub + if: github.repository == 'ipfs/kubo' || github.event_name == 'workflow_dispatch' + name: Push stub to legacy ipfs/go-ipfs name + runs-on: ubuntu-latest + timeout-minutes: 5 + env: + LEGACY_IMAGE_NAME: ipfs/go-ipfs + steps: + - name: Check out the repo + uses: actions/checkout@v5 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ vars.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Convert tags to legacy image name + id: legacy_tags + run: | + TAGS="${{ github.event.inputs.tags || needs.docker-hub.outputs.tags }}" + if ! echo "$TAGS" | grep -q "kubo"; then + echo "ERROR: Tags must contain kubo image name" + exit 1 + fi + echo "value<> $GITHUB_OUTPUT + echo "$TAGS" | sed "s|ipfs/kubo|$LEGACY_IMAGE_NAME|g" >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + shell: bash + + - if: github.event_name != 'workflow_dispatch' || github.event.inputs.push == 'true' + name: Build and push legacy stub image + uses: docker/build-push-action@v6 + with: + platforms: linux/amd64,linux/arm/v7,linux/arm64/v8 + context: . + push: true + file: ./.github/legacy/Dockerfile.goipfs-stub + tags: ${{ steps.legacy_tags.outputs.value }} diff --git a/.github/workflows/gateway-conformance.yml b/.github/workflows/gateway-conformance.yml index 3518afad7..fcb982cca 100644 --- a/.github/workflows/gateway-conformance.yml +++ b/.github/workflows/gateway-conformance.yml @@ -109,13 +109,13 @@ jobs: run: cat output.md >> $GITHUB_STEP_SUMMARY - name: Upload HTML report if: failure() || success() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v5 with: name: gateway-conformance.html path: output.html - name: Upload JSON report if: failure() || success() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v5 with: name: gateway-conformance.json path: output.json @@ -214,13 +214,13 @@ jobs: run: cat output.md >> $GITHUB_STEP_SUMMARY - name: Upload HTML report if: failure() || success() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v5 with: name: gateway-conformance-libp2p.html path: output.html - name: Upload JSON report if: failure() || success() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v5 with: name: gateway-conformance-libp2p.json path: output.json diff --git a/.github/workflows/gotest.yml b/.github/workflows/gotest.yml index f08fcaac4..225c9621d 100644 --- a/.github/workflows/gotest.yml +++ b/.github/workflows/gotest.yml @@ -78,7 +78,7 @@ jobs: output: test/unit/gotest.junit.xml if: failure() || success() - name: Archive the JUnit XML report - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v5 with: name: unit path: test/unit/gotest.junit.xml @@ -91,7 +91,7 @@ jobs: output: test/unit/gotest.html if: failure() || success() - name: Archive the HTML report - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v5 with: name: html path: test/unit/gotest.html diff --git a/.github/workflows/interop.yml b/.github/workflows/interop.yml index d0f3b9a79..ccad87ecd 100644 --- a/.github/workflows/interop.yml +++ b/.github/workflows/interop.yml @@ -37,7 +37,7 @@ jobs: with: go-version-file: 'go.mod' - run: make build - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@v5 with: name: kubo path: cmd/ipfs/ipfs @@ -49,10 +49,10 @@ jobs: run: shell: bash steps: - - uses: actions/setup-node@v5 + - uses: actions/setup-node@v6 with: node-version: lts/* - - uses: actions/download-artifact@v5 + - uses: actions/download-artifact@v6 with: name: kubo path: cmd/ipfs @@ -84,10 +84,10 @@ jobs: run: shell: bash steps: - - uses: actions/setup-node@v5 + - uses: actions/setup-node@v6 with: node-version: 20.x - - uses: actions/download-artifact@v5 + - uses: actions/download-artifact@v6 with: name: kubo path: cmd/ipfs diff --git a/.github/workflows/sharness.yml b/.github/workflows/sharness.yml index 8c0c39130..1deb2ffd0 100644 --- a/.github/workflows/sharness.yml +++ b/.github/workflows/sharness.yml @@ -88,7 +88,7 @@ jobs: destination: sharness.html - name: Upload one-page HTML report if: github.repository != 'ipfs/kubo' && (failure() || success()) - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v5 with: name: sharness.html path: kubo/test/sharness/test-results/sharness.html @@ -108,7 +108,7 @@ jobs: destination: sharness-html/ - name: Upload full HTML report if: github.repository != 'ipfs/kubo' && (failure() || success()) - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v5 with: name: sharness-html path: kubo/test/sharness/test-results/sharness-html diff --git a/.github/workflows/sync-release-assets.yml b/.github/workflows/sync-release-assets.yml index c8ba7338c..33869f11d 100644 --- a/.github/workflows/sync-release-assets.yml +++ b/.github/workflows/sync-release-assets.yml @@ -22,7 +22,7 @@ jobs: - uses: ipfs/start-ipfs-daemon-action@v1 with: args: --init --init-profile=flatfs,server --enable-gc=false - - uses: actions/setup-node@v5 + - uses: actions/setup-node@v6 with: node-version: 14 - name: Sync the latest 5 github releases diff --git a/.github/workflows/test-migrations.yml b/.github/workflows/test-migrations.yml index 1def94ff7..c1840daa2 100644 --- a/.github/workflows/test-migrations.yml +++ b/.github/workflows/test-migrations.yml @@ -77,7 +77,7 @@ jobs: - name: Upload test results if: always() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v5 with: name: ${{ matrix.os }}-test-results path: | diff --git a/CHANGELOG.md b/CHANGELOG.md index eefffc3e2..0db008b1d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ # Kubo Changelogs +- [v0.39](docs/changelogs/v0.39.md) - [v0.38](docs/changelogs/v0.38.md) - [v0.37](docs/changelogs/v0.37.md) - [v0.36](docs/changelogs/v0.36.md) diff --git a/README.md b/README.md index b7acab5b5..bd1cf9967 100644 --- a/README.md +++ b/README.md @@ -191,13 +191,13 @@ $ ipfs ls /ipns/dist.ipfs.tech/kubo/$VERSION To download a given build of a version: ```console -$ ipfs get /ipns/dist.ipfs.tech/kubo/$VERSION/kubo_$VERSION_darwin-386.tar.gz # darwin 32-bit build -$ ipfs get /ipns/dist.ipfs.tech/kubo/$VERSION/kubo_$VERSION_darwin-amd64.tar.gz # darwin 64-bit build -$ ipfs get /ipns/dist.ipfs.tech/kubo/$VERSION/kubo_$VERSION_freebsd-amd64.tar.gz # freebsd 64-bit build -$ ipfs get /ipns/dist.ipfs.tech/kubo/$VERSION/kubo_$VERSION_linux-386.tar.gz # linux 32-bit build -$ ipfs get /ipns/dist.ipfs.tech/kubo/$VERSION/kubo_$VERSION_linux-amd64.tar.gz # linux 64-bit build -$ ipfs get /ipns/dist.ipfs.tech/kubo/$VERSION/kubo_$VERSION_linux-arm.tar.gz # linux arm build -$ ipfs get /ipns/dist.ipfs.tech/kubo/$VERSION/kubo_$VERSION_windows-amd64.zip # windows 64-bit build +$ ipfs get /ipns/dist.ipfs.tech/kubo/$VERSION/kubo_$VERSION_darwin-amd64.tar.gz # darwin amd64 build +$ ipfs get /ipns/dist.ipfs.tech/kubo/$VERSION/kubo_$VERSION_darwin-arm64.tar.gz # darwin arm64 build +$ ipfs get /ipns/dist.ipfs.tech/kubo/$VERSION/kubo_$VERSION_freebsd-amd64.tar.gz # freebsd amd64 build +$ ipfs get /ipns/dist.ipfs.tech/kubo/$VERSION/kubo_$VERSION_linux-amd64.tar.gz # linux amd64 build +$ ipfs get /ipns/dist.ipfs.tech/kubo/$VERSION/kubo_$VERSION_linux-riscv64.tar.gz # linux riscv64 build +$ ipfs get /ipns/dist.ipfs.tech/kubo/$VERSION/kubo_$VERSION_linux-arm64.tar.gz # linux arm64 build +$ ipfs get /ipns/dist.ipfs.tech/kubo/$VERSION/kubo_$VERSION_windows-amd64.zip # windows amd64 build ``` ### Unofficial Linux packages diff --git a/bin/get-docker-tags.sh b/bin/get-docker-tags.sh index 1c4e184f2..19f218a27 100755 --- a/bin/get-docker-tags.sh +++ b/bin/get-docker-tags.sh @@ -29,12 +29,10 @@ GIT_BRANCH=${3:-$(git symbolic-ref -q --short HEAD || echo "unknown")} GIT_TAG=${4:-$(git describe --tags --exact-match 2> /dev/null || echo "")} IMAGE_NAME=${IMAGE_NAME:-ipfs/kubo} -LEGACY_IMAGE_NAME=${LEGACY_IMAGE_NAME:-ipfs/go-ipfs} echoImageName () { local IMAGE_TAG=$1 echo "$IMAGE_NAME:$IMAGE_TAG" - echo "$LEGACY_IMAGE_NAME:$IMAGE_TAG" } if [[ $GIT_TAG =~ ^v[0-9]+\.[0-9]+\.[0-9]+-rc ]]; then diff --git a/config/provide.go b/config/provide.go index 9fc378a32..fd72c0576 100644 --- a/config/provide.go +++ b/config/provide.go @@ -16,9 +16,10 @@ const ( DefaultProvideDHTInterval = 22 * time.Hour // https://github.com/ipfs/kubo/pull/9326 DefaultProvideDHTMaxWorkers = 16 // Unified default for both sweep and legacy providers DefaultProvideDHTSweepEnabled = false + DefaultProvideDHTResumeEnabled = true DefaultProvideDHTDedicatedPeriodicWorkers = 2 DefaultProvideDHTDedicatedBurstWorkers = 1 - DefaultProvideDHTMaxProvideConnsPerWorker = 16 + DefaultProvideDHTMaxProvideConnsPerWorker = 20 DefaultProvideDHTKeystoreBatchSize = 1 << 14 // ~544 KiB per batch (1 multihash = 34 bytes) DefaultProvideDHTOfflineDelay = 2 * time.Hour ) @@ -86,6 +87,12 @@ type ProvideDHT struct { // OfflineDelay sets the delay after which the provider switches from Disconnected to Offline state (sweep mode only). // Default: DefaultProvideDHTOfflineDelay OfflineDelay *OptionalDuration `json:",omitempty"` + + // ResumeEnabled controls whether the provider resumes from its previous state on restart. + // When enabled, the provider persists its reprovide cycle state and provide queue to the datastore, + // and restores them on restart. When disabled, the provider starts fresh on each restart. + // Default: true + ResumeEnabled Flag `json:",omitempty"` } func ParseProvideStrategy(s string) ProvideStrategy { diff --git a/core/commands/provide.go b/core/commands/provide.go index 3cc8b4f3c..ba2be7d7b 100644 --- a/core/commands/provide.go +++ b/core/commands/provide.go @@ -4,33 +4,63 @@ import ( "errors" "fmt" "io" + "strings" "text/tabwriter" "time" + "unicode/utf8" humanize "github.com/dustin/go-humanize" - "github.com/ipfs/boxo/provider" + boxoprovider "github.com/ipfs/boxo/provider" cmds "github.com/ipfs/go-ipfs-cmds" "github.com/ipfs/kubo/core/commands/cmdenv" "github.com/libp2p/go-libp2p-kad-dht/fullrt" + "github.com/libp2p/go-libp2p-kad-dht/provider" + "github.com/libp2p/go-libp2p-kad-dht/provider/buffered" + "github.com/libp2p/go-libp2p-kad-dht/provider/dual" + "github.com/libp2p/go-libp2p-kad-dht/provider/stats" + "github.com/probe-lab/go-libdht/kad/key" "golang.org/x/exp/constraints" ) const ( provideQuietOptionName = "quiet" + provideLanOptionName = "lan" + + provideStatAllOptionName = "all" + provideStatCompactOptionName = "compact" + provideStatNetworkOptionName = "network" + provideStatConnectivityOptionName = "connectivity" + provideStatOperationsOptionName = "operations" + provideStatTimingsOptionName = "timings" + provideStatScheduleOptionName = "schedule" + provideStatQueuesOptionName = "queues" + provideStatWorkersOptionName = "workers" + + // lowWorkerThreshold is the threshold below which worker availability warnings are shown + lowWorkerThreshold = 2 ) var ProvideCmd = &cmds.Command{ Status: cmds.Experimental, Helptext: cmds.HelpText{ - Tagline: "Control providing operations", + Tagline: "Control and monitor content providing", ShortDescription: ` Control providing operations. -NOTE: This command is experimental and not all provide-related commands have -been migrated to this namespace yet. For example, 'ipfs routing -provide|reprovide' are still under the routing namespace, 'ipfs stats -reprovide' provides statistics. Additionally, 'ipfs bitswap reprovide' and -'ipfs stats provide' are deprecated. +OVERVIEW: + +The provider system advertises content by publishing provider records, +allowing other nodes to discover which peers have specific content. +Content is reprovided periodically (every Provide.DHT.Interval) +according to Provide.Strategy. + +CONFIGURATION: + +Learn more: https://github.com/ipfs/kubo/blob/master/docs/config.md#provide + +SEE ALSO: + +For ad-hoc one-time provide, see 'ipfs routing provide' `, }, @@ -47,10 +77,18 @@ var provideClearCmd = &cmds.Command{ ShortDescription: ` Clear all CIDs pending to be provided for the first time. -Note: Kubo will automatically clear the queue when it detects a change of -Provide.Strategy upon a restart. For more information about provide -strategies, see: -https://github.com/ipfs/kubo/blob/master/docs/config.md#providestrategy +BEHAVIOR: + +This command removes CIDs from the provide queue that are waiting to be +advertised to the DHT for the first time. It does not affect content that +is already being reprovided on schedule. + +AUTOMATIC CLEARING: + +Kubo will automatically clear the queue when it detects a change of +Provide.Strategy upon a restart. + +Learn: https://github.com/ipfs/kubo/blob/master/docs/config.md#providestrategy `, }, Options: []cmds.Option{ @@ -90,25 +128,108 @@ https://github.com/ipfs/kubo/blob/master/docs/config.md#providestrategy } type provideStats struct { - provider.ReproviderStats - fullRT bool + Sweep *stats.Stats + Legacy *boxoprovider.ReproviderStats + FullRT bool // only used for legacy stats +} + +// extractSweepingProvider extracts a SweepingProvider from the given provider interface. +// It handles unwrapping buffered and dual providers, selecting LAN or WAN as specified. +// Returns nil if the provider is not a sweeping provider type. +func extractSweepingProvider(prov any, useLAN bool) *provider.SweepingProvider { + switch p := prov.(type) { + case *provider.SweepingProvider: + return p + case *dual.SweepingProvider: + if useLAN { + return p.LAN + } + return p.WAN + case *buffered.SweepingProvider: + // Recursively extract from the inner provider + return extractSweepingProvider(p.Provider, useLAN) + default: + return nil + } } var provideStatCmd = &cmds.Command{ Status: cmds.Experimental, Helptext: cmds.HelpText{ - Tagline: "Returns statistics about the node's provider system.", + Tagline: "Show statistics about the provider system", ShortDescription: ` -Returns statistics about the content the node is reproviding every -Provide.DHT.Interval according to Provide.Strategy: -https://github.com/ipfs/kubo/blob/master/docs/config.md#provide +Returns statistics about the node's provider system. -This interface is not stable and may change from release to release. +OVERVIEW: +The provide system advertises content to the DHT by publishing provider +records that map CIDs to your peer ID. These records expire after a fixed +TTL to account for node churn, so content must be reprovided periodically +to stay discoverable. + +Two provider types exist: + +- Sweep provider: Divides the DHT keyspace into regions and systematically + sweeps through them over the reprovide interval. Batches CIDs allocated + to the same DHT servers, reducing lookups from N (one per CID) to a + small static number based on DHT size (~3k for 10k DHT servers). Spreads + work evenly over time to prevent resource spikes and ensure announcements + happen just before records expire. + +- Legacy provider: Processes each CID individually with separate DHT + lookups. Attempts to reprovide all content as quickly as possible at the + start of each cycle. Works well for small datasets but struggles with + large collections. + +Learn more: +- Config: https://github.com/ipfs/kubo/blob/master/docs/config.md#provide +- Metrics: https://github.com/ipfs/kubo/blob/master/docs/provide-stats.md + +DEFAULT OUTPUT: + +Shows a brief summary including queue sizes, scheduled items, average record +holders, ongoing/total provides, and worker warnings. + +DETAILED OUTPUT: + +Use --all for detailed statistics with these sections: connectivity, queues, +schedule, timings, network, operations, and workers. Individual sections can +be displayed with their flags (e.g., --network, --operations). Multiple flags +can be combined. + +Use --compact for monitoring-friendly 2-column output (requires --all). + +EXAMPLES: + +Monitor provider statistics in real-time with 2-column layout: + + watch ipfs provide stat --all --compact + +Get statistics in JSON format for programmatic processing: + + ipfs provide stat --enc=json | jq + +NOTES: + +- This interface is experimental and may change between releases +- Legacy provider shows basic stats only (no flags supported) +- "Regions" are keyspace divisions for spreading reprovide work +- For Dual DHT: use --lan for LAN provider stats (default is WAN) `, }, Arguments: []cmds.Argument{}, - Options: []cmds.Option{}, + Options: []cmds.Option{ + cmds.BoolOption(provideLanOptionName, "Show stats for LAN DHT only (for Sweep+Dual DHT only)"), + cmds.BoolOption(provideStatAllOptionName, "a", "Display all provide sweep stats"), + cmds.BoolOption(provideStatCompactOptionName, "Display stats in 2-column layout (requires --all)"), + cmds.BoolOption(provideStatConnectivityOptionName, "Display DHT connectivity status"), + cmds.BoolOption(provideStatNetworkOptionName, "Display network stats (peers, reachability, region size)"), + cmds.BoolOption(provideStatScheduleOptionName, "Display reprovide schedule (CIDs/regions scheduled, next reprovide time)"), + cmds.BoolOption(provideStatTimingsOptionName, "Display timing information (uptime, cycle start, reprovide interval)"), + cmds.BoolOption(provideStatWorkersOptionName, "Display worker pool stats (active/available/queued workers)"), + cmds.BoolOption(provideStatOperationsOptionName, "Display operation stats (ongoing/past provides, rates, errors)"), + cmds.BoolOption(provideStatQueuesOptionName, "Display provide and reprovide queue sizes"), + }, Run: func(req *cmds.Request, res cmds.ResponseEmitter, env cmds.Environment) error { nd, err := cmdenv.GetNode(env) if err != nil { @@ -119,35 +240,272 @@ This interface is not stable and may change from release to release. return ErrNotOnline } - provideSys, ok := nd.Provider.(provider.System) - if !ok { - return errors.New("stats not available with experimental sweeping provider (Provide.DHT.SweepEnabled=true)") + lanStats, _ := req.Options[provideLanOptionName].(bool) + + // Handle legacy provider + if legacySys, ok := nd.Provider.(boxoprovider.System); ok { + if lanStats { + return errors.New("LAN stats only available for Sweep provider with Dual DHT") + } + stats, err := legacySys.Stat() + if err != nil { + return err + } + _, fullRT := nd.DHTClient.(*fullrt.FullRT) + return res.Emit(provideStats{Legacy: &stats, FullRT: fullRT}) } - stats, err := provideSys.Stat() - if err != nil { - return err - } - _, fullRT := nd.DHTClient.(*fullrt.FullRT) - - if err := res.Emit(provideStats{stats, fullRT}); err != nil { - return err + // Extract sweeping provider (handles buffered and dual unwrapping) + sweepingProvider := extractSweepingProvider(nd.Provider, lanStats) + if sweepingProvider == nil { + if lanStats { + return errors.New("LAN stats only available for Sweep provider with Dual DHT") + } + return fmt.Errorf("stats not available with current routing system %T", nd.Provider) } - return nil + s := sweepingProvider.Stats() + return res.Emit(provideStats{Sweep: &s}) }, Encoders: cmds.EncoderMap{ cmds.Text: cmds.MakeTypedEncoder(func(req *cmds.Request, w io.Writer, s provideStats) error { wtr := tabwriter.NewWriter(w, 1, 2, 1, ' ', 0) defer wtr.Flush() - fmt.Fprintf(wtr, "TotalReprovides:\t%s\n", humanNumber(s.TotalReprovides)) - fmt.Fprintf(wtr, "AvgReprovideDuration:\t%s\n", humanDuration(s.AvgReprovideDuration)) - fmt.Fprintf(wtr, "LastReprovideDuration:\t%s\n", humanDuration(s.LastReprovideDuration)) - if !s.LastRun.IsZero() { - fmt.Fprintf(wtr, "LastReprovide:\t%s\n", humanTime(s.LastRun)) - if s.fullRT { - fmt.Fprintf(wtr, "NextReprovide:\t%s\n", humanTime(s.LastRun.Add(s.ReprovideInterval))) + all, _ := req.Options[provideStatAllOptionName].(bool) + compact, _ := req.Options[provideStatCompactOptionName].(bool) + connectivity, _ := req.Options[provideStatConnectivityOptionName].(bool) + queues, _ := req.Options[provideStatQueuesOptionName].(bool) + schedule, _ := req.Options[provideStatScheduleOptionName].(bool) + network, _ := req.Options[provideStatNetworkOptionName].(bool) + timings, _ := req.Options[provideStatTimingsOptionName].(bool) + operations, _ := req.Options[provideStatOperationsOptionName].(bool) + workers, _ := req.Options[provideStatWorkersOptionName].(bool) + + flagCount := 0 + for _, enabled := range []bool{all, connectivity, queues, schedule, network, timings, operations, workers} { + if enabled { + flagCount++ + } + } + + if s.Legacy != nil { + if flagCount > 0 { + return errors.New("cannot use flags with legacy provide stats") + } + fmt.Fprintf(wtr, "TotalReprovides:\t%s\n", humanNumber(s.Legacy.TotalReprovides)) + fmt.Fprintf(wtr, "AvgReprovideDuration:\t%s\n", humanDuration(s.Legacy.AvgReprovideDuration)) + fmt.Fprintf(wtr, "LastReprovideDuration:\t%s\n", humanDuration(s.Legacy.LastReprovideDuration)) + if !s.Legacy.LastRun.IsZero() { + fmt.Fprintf(wtr, "LastReprovide:\t%s\n", humanTime(s.Legacy.LastRun)) + if s.FullRT { + fmt.Fprintf(wtr, "NextReprovide:\t%s\n", humanTime(s.Legacy.LastRun.Add(s.Legacy.ReprovideInterval))) + } + } + return nil + } + + if s.Sweep == nil { + return errors.New("no provide stats available") + } + + // Sweep provider stats + if s.Sweep.Closed { + fmt.Fprintf(wtr, "Provider is closed\n") + return nil + } + + if compact && !all { + return errors.New("--compact requires --all flag") + } + + brief := flagCount == 0 + showHeadings := flagCount > 1 || all + + compactMode := all && compact + var cols [2][]string + col0MaxWidth := 0 + // formatLine handles both normal and compact output modes: + // - Normal mode: all lines go to cols[0], col parameter is ignored + // - Compact mode: col 0 for left column, col 1 for right column + formatLine := func(col int, format string, a ...any) { + if compactMode { + s := fmt.Sprintf(format, a...) + cols[col] = append(cols[col], s) + if col == 0 { + col0MaxWidth = max(col0MaxWidth, utf8.RuneCountInString(s)) + } + return + } + format = strings.Replace(format, ": ", ":\t", 1) + format = strings.Replace(format, ", ", ",\t", 1) + cols[0] = append(cols[0], fmt.Sprintf(format, a...)) + } + addBlankLine := func(col int) { + if !brief { + formatLine(col, "") + } + } + sectionTitle := func(col int, title string) { + if !brief && showHeadings { + //nolint:govet // dynamic format string is intentional + formatLine(col, title+":") + } + } + + indent := " " + if brief || !showHeadings { + indent = "" + } + + // Connectivity + if all || connectivity || brief && s.Sweep.Connectivity.Status != "online" { + sectionTitle(1, "Connectivity") + since := s.Sweep.Connectivity.Since + if since.IsZero() { + formatLine(1, "%sStatus: %s", indent, s.Sweep.Connectivity.Status) + } else { + formatLine(1, "%sStatus: %s (%s)", indent, s.Sweep.Connectivity.Status, humanTime(since)) + } + addBlankLine(1) + } + + // Queues + if all || queues || brief { + sectionTitle(1, "Queues") + formatLine(1, "%sProvide queue: %s CIDs, %s regions", indent, humanNumber(s.Sweep.Queues.PendingKeyProvides), humanNumber(s.Sweep.Queues.PendingRegionProvides)) + formatLine(1, "%sReprovide queue: %s regions", indent, humanNumber(s.Sweep.Queues.PendingRegionReprovides)) + addBlankLine(1) + } + + // Schedule + if all || schedule || brief { + sectionTitle(0, "Schedule") + formatLine(0, "%sCIDs scheduled: %s", indent, humanNumber(s.Sweep.Schedule.Keys)) + formatLine(0, "%sRegions scheduled: %s", indent, humanNumberOrNA(s.Sweep.Schedule.Regions)) + if !brief { + formatLine(0, "%sAvg prefix length: %s", indent, humanFloatOrNA(s.Sweep.Schedule.AvgPrefixLength)) + nextPrefix := key.BitString(s.Sweep.Schedule.NextReprovidePrefix) + if nextPrefix == "" { + nextPrefix = "N/A" + } + formatLine(0, "%sNext region prefix: %s", indent, nextPrefix) + nextReprovideAt := s.Sweep.Schedule.NextReprovideAt.Format("15:04:05") + if s.Sweep.Schedule.NextReprovideAt.IsZero() { + nextReprovideAt = "N/A" + } + formatLine(0, "%sNext region reprovide: %s", indent, nextReprovideAt) + } + addBlankLine(0) + } + + // Timings + if all || timings { + sectionTitle(1, "Timings") + formatLine(1, "%sUptime: %s (%s)", indent, humanDuration(s.Sweep.Timing.Uptime), humanTime(time.Now().Add(-s.Sweep.Timing.Uptime))) + formatLine(1, "%sCurrent time offset: %s", indent, humanDuration(s.Sweep.Timing.CurrentTimeOffset)) + formatLine(1, "%sCycle started: %s", indent, humanTime(s.Sweep.Timing.CycleStart)) + formatLine(1, "%sReprovide interval: %s", indent, humanDuration(s.Sweep.Timing.ReprovidesInterval)) + addBlankLine(1) + } + + // Network + if all || network || brief { + sectionTitle(0, "Network") + formatLine(0, "%sAvg record holders: %s", indent, humanFloatOrNA(s.Sweep.Network.AvgHolders)) + if !brief { + formatLine(0, "%sPeers swept: %s", indent, humanNumber(s.Sweep.Network.Peers)) + formatLine(0, "%sFull keyspace coverage: %t", indent, s.Sweep.Network.CompleteKeyspaceCoverage) + if s.Sweep.Network.Peers > 0 { + formatLine(0, "%sReachable peers: %s (%s%%)", indent, humanNumber(s.Sweep.Network.Reachable), humanNumber(100*s.Sweep.Network.Reachable/s.Sweep.Network.Peers)) + } else { + formatLine(0, "%sReachable peers: %s", indent, humanNumber(s.Sweep.Network.Reachable)) + } + formatLine(0, "%sAvg region size: %s", indent, humanFloatOrNA(s.Sweep.Network.AvgRegionSize)) + formatLine(0, "%sReplication factor: %s", indent, humanNumber(s.Sweep.Network.ReplicationFactor)) + addBlankLine(0) + } + } + + // Operations + if all || operations || brief { + sectionTitle(1, "Operations") + // Ongoing operations + formatLine(1, "%sOngoing provides: %s CIDs, %s regions", indent, humanNumber(s.Sweep.Operations.Ongoing.KeyProvides), humanNumber(s.Sweep.Operations.Ongoing.RegionProvides)) + formatLine(1, "%sOngoing reprovides: %s CIDs, %s regions", indent, humanNumber(s.Sweep.Operations.Ongoing.KeyReprovides), humanNumber(s.Sweep.Operations.Ongoing.RegionReprovides)) + // Past operations summary + formatLine(1, "%sTotal CIDs provided: %s", indent, humanNumber(s.Sweep.Operations.Past.KeysProvided)) + if !brief { + formatLine(1, "%sTotal records provided: %s", indent, humanNumber(s.Sweep.Operations.Past.RecordsProvided)) + formatLine(1, "%sTotal provide errors: %s", indent, humanNumber(s.Sweep.Operations.Past.KeysFailed)) + formatLine(1, "%sCIDs provided/min/worker: %s", indent, humanFloatOrNA(s.Sweep.Operations.Past.KeysProvidedPerMinute)) + formatLine(1, "%sCIDs reprovided/min/worker: %s", indent, humanFloatOrNA(s.Sweep.Operations.Past.KeysReprovidedPerMinute)) + formatLine(1, "%sRegion reprovide duration: %s", indent, humanDurationOrNA(s.Sweep.Operations.Past.RegionReprovideDuration)) + formatLine(1, "%sAvg CIDs/reprovide: %s", indent, humanFloatOrNA(s.Sweep.Operations.Past.AvgKeysPerReprovide)) + formatLine(1, "%sRegions reprovided (last cycle): %s", indent, humanNumber(s.Sweep.Operations.Past.RegionReprovidedLastCycle)) + addBlankLine(1) + } + } + + // Workers + displayWorkers := all || workers + if displayWorkers || brief { + availableReservedBurst := max(0, s.Sweep.Workers.DedicatedBurst-s.Sweep.Workers.ActiveBurst) + availableReservedPeriodic := max(0, s.Sweep.Workers.DedicatedPeriodic-s.Sweep.Workers.ActivePeriodic) + availableFreeWorkers := max(0, s.Sweep.Workers.Max-max(s.Sweep.Workers.DedicatedBurst, s.Sweep.Workers.ActiveBurst)-max(s.Sweep.Workers.DedicatedPeriodic, s.Sweep.Workers.ActivePeriodic)) + availableBurst := availableFreeWorkers + availableReservedBurst + availablePeriodic := availableFreeWorkers + availableReservedPeriodic + + if displayWorkers || availableBurst <= lowWorkerThreshold || availablePeriodic <= lowWorkerThreshold { + // Either we want to display workers information, or we are low on + // available workers and want to warn the user. + sectionTitle(0, "Workers") + specifyWorkers := " workers" + if compactMode { + specifyWorkers = "" + } + formatLine(0, "%sActive%s: %s / %s (max)", indent, specifyWorkers, humanNumber(s.Sweep.Workers.Active), humanFull(float64(s.Sweep.Workers.Max), 0)) + if brief { + // Brief mode - show condensed worker info + formatLine(0, "%sPeriodic%s: %s active, %s available, %s queued", indent, specifyWorkers, + humanNumber(s.Sweep.Workers.ActivePeriodic), humanNumber(availablePeriodic), humanNumber(s.Sweep.Workers.QueuedPeriodic)) + formatLine(0, "%sBurst%s: %s active, %s available, %s queued\n", indent, specifyWorkers, + humanNumber(s.Sweep.Workers.ActiveBurst), humanNumber(availableBurst), humanNumber(s.Sweep.Workers.QueuedBurst)) + } else { + formatLine(0, "%sFree%s: %s", indent, specifyWorkers, humanNumber(availableFreeWorkers)) + formatLine(0, "%s %-14s %-9s %s", indent, "Workers stats:", "Periodic", "Burst") + formatLine(0, "%s %-14s %-9s %s", indent, "Active:", humanNumber(s.Sweep.Workers.ActivePeriodic), humanNumber(s.Sweep.Workers.ActiveBurst)) + formatLine(0, "%s %-14s %-9s %s", indent, "Dedicated:", humanNumber(s.Sweep.Workers.DedicatedPeriodic), humanNumber(s.Sweep.Workers.DedicatedBurst)) + formatLine(0, "%s %-14s %-9s %s", indent, "Available:", humanNumber(availablePeriodic), humanNumber(availableBurst)) + formatLine(0, "%s %-14s %-9s %s", indent, "Queued:", humanNumber(s.Sweep.Workers.QueuedPeriodic), humanNumber(s.Sweep.Workers.QueuedBurst)) + formatLine(0, "%sMax connections/worker: %s", indent, humanNumber(s.Sweep.Workers.MaxProvideConnsPerWorker)) + addBlankLine(0) + } + } + } + if compactMode { + col0Width := col0MaxWidth + 2 + // Print both columns side by side + maxRows := max(len(cols[0]), len(cols[1])) + if maxRows == 0 { + return nil + } + for i := range maxRows - 1 { // last line is empty + var left, right string + if i < len(cols[0]) { + left = cols[0][i] + } + if i < len(cols[1]) { + right = cols[1][i] + } + fmt.Fprintf(wtr, "%-*s %s\n", col0Width, left, right) + } + } else { + if !brief { + cols[0] = cols[0][:len(cols[0])-1] // remove last blank line + } + for _, line := range cols[0] { + fmt.Fprintln(wtr, line) } } return nil @@ -157,10 +515,23 @@ This interface is not stable and may change from release to release. } func humanDuration(val time.Duration) string { + if val > time.Second { + return val.Truncate(100 * time.Millisecond).String() + } return val.Truncate(time.Microsecond).String() } +func humanDurationOrNA(val time.Duration) string { + if val <= 0 { + return "N/A" + } + return humanDuration(val) +} + func humanTime(val time.Time) string { + if val.IsZero() { + return "N/A" + } return val.Format("2006-01-02 15:04:05") } @@ -174,6 +545,24 @@ func humanNumber[T constraints.Float | constraints.Integer](n T) string { return str } +// humanNumberOrNA is like humanNumber but returns "N/A" for non-positive values. +func humanNumberOrNA[T constraints.Float | constraints.Integer](n T) string { + if n <= 0 { + return "N/A" + } + return humanNumber(n) +} + +// humanFloatOrNA formats a float with 1 decimal place, returning "N/A" for non-positive values. +// This is separate from humanNumberOrNA because it provides simple decimal formatting for +// continuous metrics (averages, rates) rather than SI unit formatting used for discrete counts. +func humanFloatOrNA(val float64) string { + if val <= 0 { + return "N/A" + } + return humanFull(val, 1) +} + func humanSI(val float64, decimals int) string { v, unit := humanize.ComputeSI(val) return fmt.Sprintf("%s%s", humanFull(v, decimals), unit) diff --git a/core/node/provider.go b/core/node/provider.go index 2c77e580c..52de235c8 100644 --- a/core/node/provider.go +++ b/core/node/provider.go @@ -14,6 +14,7 @@ import ( "github.com/ipfs/boxo/provider" "github.com/ipfs/go-cid" "github.com/ipfs/go-datastore" + "github.com/ipfs/go-datastore/namespace" "github.com/ipfs/go-datastore/query" "github.com/ipfs/kubo/config" "github.com/ipfs/kubo/repo" @@ -36,13 +37,30 @@ import ( "go.uber.org/fx" ) -// The size of a batch that will be used for calculating average announcement -// time per CID, inside of boxo/provider.ThroughputReport -// and in 'ipfs stats provide' report. -const sampledBatchSize = 1000 +const ( + // The size of a batch that will be used for calculating average announcement + // time per CID, inside of boxo/provider.ThroughputReport + // and in 'ipfs stats provide' report. + // Used when Provide.DHT.SweepEnabled=false + sampledBatchSize = 1000 -// Datastore key used to store previous reprovide strategy. -const reprovideStrategyKey = "/reprovideStrategy" + // Datastore key used to store previous reprovide strategy. + reprovideStrategyKey = "/reprovideStrategy" + + // Datastore namespace prefix for provider data. + providerDatastorePrefix = "provider" + // Datastore path for the provider keystore. + keystoreDatastorePath = "keystore" +) + +// Interval between reprovide queue monitoring checks for slow reprovide alerts. +// Used when Provide.DHT.SweepEnabled=true +const reprovideAlertPollInterval = 15 * time.Minute + +// Number of consecutive polling intervals with sustained queue growth before +// triggering a slow reprovide alert (3 intervals = 45 minutes). +// Used when Provide.DHT.SweepEnabled=true +const consecutiveAlertsThreshold = 3 // DHTProvider is an interface for providing keys to a DHT swarm. It holds a // state of keys to be advertised, and is responsible for periodically @@ -314,10 +332,10 @@ func SweepingProviderOpt(cfg *config.Config) fx.Option { Repo repo.Repo } sweepingReprovider := fx.Provide(func(in providerInput) (DHTProvider, *keystore.ResettableKeystore, error) { - ds := in.Repo.Datastore() + ds := namespace.Wrap(in.Repo.Datastore(), datastore.NewKey(providerDatastorePrefix)) ks, err := keystore.NewResettableKeystore(ds, keystore.WithPrefixBits(16), - keystore.WithDatastorePath("/provider/keystore"), + keystore.WithDatastorePath(keystoreDatastorePath), keystore.WithBatchSize(int(cfg.Provide.DHT.KeystoreBatchSize.WithDefault(config.DefaultProvideDHTKeystoreBatchSize))), ) if err != nil { @@ -360,6 +378,8 @@ func SweepingProviderOpt(cfg *config.Config) fx.Option { if inDht != nil { prov, err := ddhtprovider.New(inDht, ddhtprovider.WithKeystore(ks), + ddhtprovider.WithDatastore(ds), + ddhtprovider.WithResumeCycle(cfg.Provide.DHT.ResumeEnabled.WithDefault(config.DefaultProvideDHTResumeEnabled)), ddhtprovider.WithReprovideInterval(reprovideInterval), ddhtprovider.WithMaxReprovideDelay(time.Hour), @@ -393,6 +413,8 @@ func SweepingProviderOpt(cfg *config.Config) fx.Option { } opts := []dhtprovider.Option{ dhtprovider.WithKeystore(ks), + dhtprovider.WithDatastore(ds), + dhtprovider.WithResumeCycle(cfg.Provide.DHT.ResumeEnabled.WithDefault(config.DefaultProvideDHTResumeEnabled)), dhtprovider.WithPeerID(impl.Host().ID()), dhtprovider.WithRouter(impl), dhtprovider.WithMessageSender(impl.MessageSender()), @@ -508,9 +530,127 @@ func SweepingProviderOpt(cfg *config.Config) fx.Option { }) }) + // extractSweepingProvider extracts a SweepingProvider from the given provider interface. + // It handles unwrapping buffered and dual providers, always selecting WAN for dual DHT. + // Returns nil if the provider is not a sweeping provider type. + var extractSweepingProvider func(prov any) *dhtprovider.SweepingProvider + extractSweepingProvider = func(prov any) *dhtprovider.SweepingProvider { + switch p := prov.(type) { + case *dhtprovider.SweepingProvider: + return p + case *ddhtprovider.SweepingProvider: + return p.WAN + case *buffered.SweepingProvider: + // Recursively extract from the inner provider + return extractSweepingProvider(p.Provider) + default: + return nil + } + } + + type alertInput struct { + fx.In + Provider DHTProvider + } + reprovideAlert := fx.Invoke(func(lc fx.Lifecycle, in alertInput) { + prov := extractSweepingProvider(in.Provider) + + var ( + cancel context.CancelFunc + done = make(chan struct{}) + ) + + lc.Append(fx.Hook{ + OnStart: func(ctx context.Context) error { + if prov == nil { + return nil + } + gcCtx, c := context.WithCancel(context.Background()) + cancel = c + go func() { + defer close(done) + + ticker := time.NewTicker(reprovideAlertPollInterval) + defer ticker.Stop() + + var ( + queueSize, prevQueueSize int64 + queuedWorkers, prevQueuedWorkers bool + count int + ) + + for { + select { + case <-gcCtx.Done(): + return + case <-ticker.C: + } + + stats := prov.Stats() + queuedWorkers = stats.Workers.QueuedPeriodic > 0 + queueSize = int64(stats.Queues.PendingRegionReprovides) + + // Alert if reprovide queue keeps growing and all periodic workers are busy. + // Requires consecutiveAlertsThreshold intervals of sustained growth. + if prevQueuedWorkers && queuedWorkers && queueSize > prevQueueSize { + count++ + if count >= consecutiveAlertsThreshold { + logger.Errorf(` +๐Ÿ””๐Ÿ””๐Ÿ”” Reprovide Operations Too Slow ๐Ÿ””๐Ÿ””๐Ÿ”” + +Your node is falling behind on DHT reprovides, which will affect content availability. + +Keyspace regions enqueued for reprovide: + %s ago:\t%d + Now:\t%d + +All periodic workers are busy! + Active workers:\t%d / %d (max) + Active workers types:\t%d periodic, %d burst + Dedicated workers:\t%d periodic, %d burst + +Solutions (try in order): +1. Increase Provide.DHT.MaxWorkers (current %d) +2. Increase Provide.DHT.DedicatedPeriodicWorkers (current %d) +3. Set Provide.DHT.SweepEnabled=false and Routing.AcceleratedDHTClient=true (last resort, not recommended) + +See how the reprovide queue is processed in real-time with 'watch ipfs provide stat --all --compact' + +See docs: https://github.com/ipfs/kubo/blob/master/docs/config.md#providedhtmaxworkers`, + reprovideAlertPollInterval.Truncate(time.Minute).String(), prevQueueSize, queueSize, + stats.Workers.Active, stats.Workers.Max, + stats.Workers.ActivePeriodic, stats.Workers.ActiveBurst, + stats.Workers.DedicatedPeriodic, stats.Workers.DedicatedBurst, + stats.Workers.Max, stats.Workers.DedicatedPeriodic) + } + } else if !queuedWorkers { + count = 0 + } + + prevQueueSize, prevQueuedWorkers = queueSize, queuedWorkers + } + }() + return nil + }, + OnStop: func(ctx context.Context) error { + // Cancel the alert loop + if cancel != nil { + cancel() + } + select { + case <-done: + case <-ctx.Done(): + return ctx.Err() + } + return nil + }, + }) + }) + return fx.Options( sweepingReprovider, initKeystore, + reprovideAlert, ) } diff --git a/docs/RELEASE_CHECKLIST.md b/docs/RELEASE_CHECKLIST.md index 8dbb771bf..da96a20d4 100644 --- a/docs/RELEASE_CHECKLIST.md +++ b/docs/RELEASE_CHECKLIST.md @@ -1,4 +1,4 @@ - + # โœ… Release Checklist (vX.Y.Z[-rcN]) @@ -80,18 +80,18 @@ If you're making a release for the first time, do pair programming and have the - [ ] Update [ipshipyard/waterworks-infra](https://github.com/ipshipyard/waterworks-infra) - [ ] Update Kubo staging environment ([Running Kubo tests on staging](https://www.notion.so/Running-Kubo-tests-on-staging-488578bb46154f9bad982e4205621af8)) - [ ] **RC:** Test last release against current RC - - [ ] **FINAL:** Test last release against current one - - [ ] Update collab cluster boxes to the tagged release - - [ ] Update libp2p bootstrappers to the tagged release + - [ ] **FINAL:** Latest release on both boxes + - [ ] **FINAL:** Update collab cluster boxes to the tagged release + - [ ] **FINAL:** Update libp2p bootstrappers to the tagged release - [ ] Smoke test with [IPFS Companion Browser Extension](https://docs.ipfs.tech/install/ipfs-companion/) - [ ] Update [ipfs-desktop](https://github.com/ipfs/ipfs-desktop) - [ ] Create PR updating kubo version in `package.json` and `package-lock.json` - - [ ] **FINAL only:** Merge and create/request new release + - [ ] **FINAL:** Merge PR and ship new ipfs-desktop release - [ ] **FINAL only:** Update [docs.ipfs.tech](https://docs.ipfs.tech/): run [update-on-new-ipfs-tag.yml](https://github.com/ipfs/ipfs-docs/actions/workflows/update-on-new-ipfs-tag.yml) workflow and merge the PR ### Promotion -- [ ] Create [IPFS Discourse](https://discuss.ipfs.tech) topic ([RC example](https://discuss.ipfs.tech/t/kubo-v0-16-0-rc1-release-candidate-is-out/15248), [FINAL example](https://discuss.ipfs.tech/t/kubo-v0-37-0-is-out/19673)) +- [ ] Create [IPFS Discourse](https://discuss.ipfs.tech) topic ([RC example](https://discuss.ipfs.tech/t/kubo-v0-38-0-rc2-is-out/19772), [FINAL example](https://discuss.ipfs.tech/t/kubo-v0-38-0-is-out/19795)) - [ ] Title: `Kubo vX.Y.Z(-rcN) is out!`, tag: `kubo` - [ ] Use title as heading (`##`) in description - [ ] Include: GitHub release link, IPNS binaries, docker pull command, release notes diff --git a/docs/changelogs/v0.39.md b/docs/changelogs/v0.39.md new file mode 100644 index 000000000..177aca4fd --- /dev/null +++ b/docs/changelogs/v0.39.md @@ -0,0 +1,164 @@ +# Kubo changelog v0.39 + + + +This release was brought to you by the [Shipyard](https://ipshipyard.com/) team. + +- [v0.39.0](#v0390) + +## v0.39.0 + +- [Overview](#overview) +- [๐Ÿ”ฆ Highlights](#-highlights) + - [๐Ÿ“Š Detailed statistics for Sweep provider with `ipfs provide stat`](#-detailed-statistics-for-sweep-provider-with-ipfs-provide-stat) + - [โฏ๏ธ Provider resume cycle for improved reproviding reliability](#provider-resume-cycle-for-improved-reproviding-reliability) + - [๐Ÿ”” Sweep provider slow reprovide warnings](#-sweep-provider-slow-reprovide-warnings) + - [๐Ÿ”ง Fixed UPnP port forwarding after router restarts](#-fixed-upnp-port-forwarding-after-router-restarts) + - [๐Ÿ–ฅ๏ธ RISC-V support with prebuilt binaries](#๏ธ-risc-v-support-with-prebuilt-binaries) + - [๐Ÿชฆ Deprecated `go-ipfs` name no longer published](#-deprecated-go-ipfs-name-no-longer-published) +- [๐Ÿ“ฆ๏ธ Important dependency updates](#-important-dependency-updates) +- [๐Ÿ“ Changelog](#-changelog) +- [๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ Contributors](#-contributors) + +### Overview + +### ๐Ÿ”ฆ Highlights + +#### ๐Ÿ“Š Detailed statistics for Sweep provider with `ipfs provide stat` + +The experimental Sweep provider system ([introduced in +v0.38](https://github.com/ipfs/kubo/blob/master/docs/changelogs/v0.38.md#-experimental-sweeping-dht-provider)) +now has detailed statistics available through `ipfs provide stat`. + +These statistics help you monitor provider health and troubleshoot issues, +especially useful for nodes providing large content collections. You can quickly +identify bottlenecks like queue backlog, worker saturation, or connectivity +problems that might prevent content from being announced to the DHT. + +**Default behavior:** Displays a brief summary showing queue sizes, scheduled +CIDs/regions, average record holders, ongoing/total provides, and worker status +when resources are constrained. + +**Detailed statistics with `--all`:** View complete metrics organized into sections: + +- **Connectivity**: DHT connection status +- **Queues**: Pending provide and reprovide operations +- **Schedule**: CIDs/regions scheduled for reprovide +- **Timings**: Uptime, reprovide cycle information +- **Network**: Peer statistics, keyspace region sizes +- **Operations**: Ongoing and past provides, rates, errors +- **Workers**: Worker pool utilization and availability + +**Real-time monitoring:** For continuous monitoring, run +`watch ipfs provide stat --all --compact` to see detailed statistics refreshed +in a 2-column layout. This lets you observe provide rates, queue sizes, and +worker availability in real-time. Individual sections can be displayed using +flags like `--network`, `--operations`, or `--workers`, and multiple flags can +be combined for custom views. + +**Dual DHT support:** For Dual DHT configurations, use `--lan` to view LAN DHT +provider statistics instead of the default WAN DHT stats. + +> [!NOTE] +> These statistics are only available when using the Sweep provider system +> (enabled via +> [`Provide.DHT.SweepEnabled`](https://github.com/ipfs/kubo/blob/master/docs/config.md#providedhtsweepenabled)). +> Legacy provider shows basic statistics without flag support. + +#### โฏ๏ธ Provider resume cycle for improved reproviding reliability + +When using the sweeping provider (`Provide.DHT.SweepEnabled`), Kubo now +persists the reprovide cycle state and automatically resumes where it left off +after a restart. This brings several improvements: + +- **Persistent progress**: The provider now saves its position in the reprovide +cycle to the datastore. On restart, it continues from where it stopped instead +of starting from scratch. +- **Catch-up reproviding**: If the node was offline for an extended period, all +CIDs that haven't been reprovided within the configured reprovide interval are +immediately queued for reproviding when the node starts up. This ensures +content availability is maintained even after downtime. +- **Persistent provide queue**: The provide queue is now persisted to the +datastore on shutdown. When the node restarts, queued CIDs are restored and +provided as expected, preventing loss of pending provide operations. +- **Resume control**: The resume behavior is now controlled via the +`Provide.DHT.ResumeEnabled` config option (default: `true`). If you don't want +to keep the persisted provider state from a previous run, you can set +`Provide.DHT.ResumeEnabled=false` in your config. + +This feature significantly improves the reliability of content providing, +especially for nodes that experience intermittent connectivity or restarts. + +#### ๐Ÿ”” Sweep provider slow reprovide warnings + +Kubo now monitors DHT reprovide operations when `Provide.DHT.SweepEnabled=true` +and alerts you if your node is falling behind on reprovides. + +When the reprovide queue consistently grows and all periodic workers are busy, +a warning displays with: + +- Queue size and worker utilization details +- Recommended solutions: increase `Provide.DHT.MaxWorkers` or `Provide.DHT.DedicatedPeriodicWorkers` +- Command to monitor real-time progress: `watch ipfs provide stat --all --compact` + +The alert polls every 15 minutes (to avoid alert fatigue while catching +persistent issues) and only triggers after sustained growth across multiple +intervals. The legacy provider is unaffected by this change. + +#### ๐Ÿ”ง Fixed UPnP port forwarding after router restarts + +Kubo now automatically recovers UPnP port mappings when routers restart or +become temporarily unavailable, fixing a critical connectivity issue that +affected self-hosted nodes behind NAT. + +**Previous behavior:** When a UPnP-enabled router restarted, Kubo would lose +its port mapping and fail to re-establish it automatically. Nodes would become +unreachable to the network until the daemon was manually restarted, forcing +reliance on relay connections which degraded performance. + +**New behavior:** The upgraded go-libp2p (v0.44.0) includes [Shipyard's fix](https://github.com/libp2p/go-libp2p/pull/3367) +for self-healing NAT mappings that automatically rediscover and re-establish +port forwarding after router events. Nodes now maintain public connectivity +without manual intervention. + +> [!NOTE] +> If your node runs behind a router and you haven't manually configured port +> forwarding, make sure [`Swarm.DisableNatPortMap=false`](https://github.com/ipfs/kubo/blob/master/docs/config.md#swarmdisablenatportmap) +> so UPnP can automatically handle port mapping (this is the default). + +This significantly improves reliability for desktop and self-hosted IPFS nodes +using UPnP for NAT traversal. + +#### ๐Ÿ–ฅ๏ธ RISC-V support with prebuilt binaries + +Kubo now provides official `linux-riscv64` prebuilt binaries with every release, +bringing IPFS to [RISC-V](https://en.wikipedia.org/wiki/RISC-V) open hardware. + +As RISC-V single-board computers and embedded systems become more accessible, +it's good to see the distributed web supported on open hardware architectures - +a natural pairing of open technologies. + +Download from or + and look for the `linux-riscv64` archive. + +#### ๐Ÿชฆ Deprecated `go-ipfs` name no longer published + +The `go-ipfs` name was deprecated in 2022 and renamed to `kubo`. Starting with this release, we have stopped publishing Docker images and distribution binaries under the old `go-ipfs` name. + +Existing users should switch to: + +- Docker: `ipfs/kubo` image (instead of `ipfs/go-ipfs`) +- Binaries: download from or + +For Docker users, the legacy `ipfs/go-ipfs` image name now shows a deprecation notice directing you to `ipfs/kubo`. + +### ๐Ÿ“ฆ๏ธ Important dependency updates + +- update `go-libp2p` to [v0.44.0](https://github.com/libp2p/go-libp2p/releases/tag/v0.44.0) with self-healing UPnP port mappings +- update `quic-go` to [v0.55.0](https://github.com/quic-go/quic-go/releases/tag/v0.55.0) +- update `go-ds-pebble` to [v0.5.6](https://github.com/ipfs/go-ds-pebble/releases/tag/v0.5.6) (includes pebble [v2.1.1](https://github.com/cockroachdb/pebble/releases/tag/v2.1.1)) +- update `boxo` to [v0.35.1](https://github.com/ipfs/boxo/releases/tag/v0.35.1) + +### ๐Ÿ“ Changelog + +### ๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ Contributors diff --git a/docs/config.md b/docs/config.md index 7982cf7f8..8e0eb4dd5 100644 --- a/docs/config.md +++ b/docs/config.md @@ -132,6 +132,7 @@ config file at runtime. - [`Provide.DHT.MaxWorkers`](#providedhtmaxworkers) - [`Provide.DHT.Interval`](#providedhtinterval) - [`Provide.DHT.SweepEnabled`](#providedhtsweepenabled) + - [`Provide.DHT.ResumeEnabled`](#providedhtresumeenabled) - [`Provide.DHT.DedicatedPeriodicWorkers`](#providedhtdedicatedperiodicworkers) - [`Provide.DHT.DedicatedBurstWorkers`](#providedhtdedicatedburstworkers) - [`Provide.DHT.MaxProvideConnsPerWorker`](#providedhtmaxprovideconnsperworker) @@ -1910,10 +1911,17 @@ Type: `duration` ## `Provide` -Configures CID announcements to the routing system, including both immediate -announcements for new content (provide) and periodic re-announcements -(reprovide) on systems that require it, like Amino DHT. While designed to support -multiple routing systems in the future, the current default configuration only supports providing to the Amino DHT. +Configures how your node advertises content to make it discoverable by other +peers. + +**What is providing?** When your node stores content, it publishes provider +records to the routing system announcing "I have this content". These records +map CIDs to your peer ID, enabling content discovery across the network. + +While designed to support multiple routing systems in the future, the current +default configuration only supports [providing to the Amino DHT](#providedht). + + ### `Provide.Enabled` @@ -1964,13 +1972,39 @@ Type: `optionalString` (unset for the default) Configuration for providing data to Amino DHT peers. +**Provider record lifecycle:** On the Amino DHT, provider records expire after +[`amino.DefaultProvideValidity`](https://github.com/libp2p/go-libp2p-kad-dht/blob/v0.34.0/amino/defaults.go#L40-L43). +Your node must re-announce (reprovide) content periodically to keep it +discoverable. The [`Provide.DHT.Interval`](#providedhtinterval) setting +controls this timing, with the default ensuring records refresh well before +expiration or negative churn effects kick in. + +**Two provider systems:** + +- **Sweep provider**: Divides the DHT keyspace into regions and systematically + sweeps through them over the reprovide interval. This batches CIDs allocated + to the same DHT servers, dramatically reducing the number of DHT lookups and + PUTs needed. Spreads work evenly over time with predictable resource usage. + +- **Legacy provider**: Processes each CID individually with separate DHT + lookups. Works well for small content collections but struggles to complete + reprovide cycles when managing thousands of CIDs. + #### Monitoring Provide Operations -You can monitor the effectiveness of your provide configuration through metrics exposed at the Prometheus endpoint: `{Addresses.API}/debug/metrics/prometheus` (default: `http://127.0.0.1:5001/debug/metrics/prometheus`). +**Quick command-line monitoring:** Use `ipfs provide stat` to view the current +state of the provider system. For real-time monitoring, run +`watch ipfs provide stat --all --compact` to see detailed statistics refreshed +continuously in a 2-column layout. -Different metrics are available depending on whether you use legacy mode (`SweepEnabled=false`) or sweep mode (`SweepEnabled=true`). See [Provide metrics documentation](https://github.com/ipfs/kubo/blob/master/docs/metrics.md#provide) for details. +**Long-term monitoring:** For in-depth or long-term monitoring, metrics are +exposed at the Prometheus endpoint: `{Addresses.API}/debug/metrics/prometheus` +(default: `http://127.0.0.1:5001/debug/metrics/prometheus`). Different metrics +are available depending on whether you use legacy mode (`SweepEnabled=false`) or +sweep mode (`SweepEnabled=true`). See [Provide metrics documentation](https://github.com/ipfs/kubo/blob/master/docs/metrics.md#provide) +for details. -To enable detailed debug logging for both providers, set: +**Debug logging:** For troubleshooting, enable detailed logging by setting: ```sh GOLOG_LOG_LEVEL=error,provider=debug,dht/provider=debug @@ -1982,12 +2016,24 @@ GOLOG_LOG_LEVEL=error,provider=debug,dht/provider=debug #### `Provide.DHT.Interval` Sets how often to re-announce content to the DHT. Provider records on Amino DHT -expire after [`amino.DefaultProvideValidity`](https://github.com/libp2p/go-libp2p-kad-dht/blob/v0.34.0/amino/defaults.go#L40-L43), -also known as Provider Record Expiration Interval. +expire after [`amino.DefaultProvideValidity`](https://github.com/libp2p/go-libp2p-kad-dht/blob/v0.34.0/amino/defaults.go#L40-L43). -An interval of about half the expiration window ensures provider records -are refreshed well before they expire. This keeps your content continuously -discoverable accounting for network churn without overwhelming the network with too frequent announcements. +**Why this matters:** The interval must be shorter than the expiration window to +ensure provider records refresh before they expire. The default value is +approximately half of [`amino.DefaultProvideValidity`](https://github.com/libp2p/go-libp2p-kad-dht/blob/v0.34.0/amino/defaults.go#L40-L43), +which accounts for network churn and ensures records stay alive without +overwhelming the network with unnecessary announcements. + +**With sweep mode enabled +([`Provide.DHT.SweepEnabled`](#providedhtsweepenabled)):** The system spreads +reprovide operations smoothly across this entire interval. Each keyspace region +is reprovided at scheduled times throughout the period, ensuring each region's +announcements complete before records expire. + +**With legacy mode:** The system attempts to reprovide all CIDs as quickly as +possible at the start of each interval. If reproviding takes longer than this +interval (common with large datasets), the next cycle is skipped and provider +records may expire. - If unset, it uses the implicit safe default. - If set to the value `"0"` it will disable content reproviding to DHT. @@ -2055,32 +2101,55 @@ Type: `optionalInteger` (non-negative; `0` means unlimited number of workers) #### `Provide.DHT.SweepEnabled` -Whether Provide Sweep is enabled. If not enabled, the legacy -[`boxo/provider`](https://github.com/ipfs/boxo/tree/main/provider) is used for -both provides and reprovides. +Enables the sweep provider for efficient content announcements. When disabled, +the legacy [`boxo/provider`](https://github.com/ipfs/boxo/tree/main/provider) is +used instead. -Provide Sweep is a resource efficient technique for advertising content to -the Amino DHT swarm. The Provide Sweep module tracks the keys that should be periodically reprovided in -the `Keystore`. It splits the keys into DHT keyspace regions by proximity (XOR -distance), and schedules when reprovides should happen in order to spread the -reprovide operation over time to avoid a spike in resource utilization. It -basically sweeps the keyspace _from left to right_ over the -[`Provide.DHT.Interval`](#providedhtinterval) time period, and reprovides keys -matching to the visited keyspace region. +**The legacy provider problem:** The legacy system processes CIDs one at a +time, requiring a separate DHT lookup (10-20 seconds each) to find the 20 +closest peers for each CID. This sequential approach typically handles less +than 10,000 CID over 22h ([`Provide.DHT.Interval`](#providedhtinterval)). If +your node has more CIDs than can be reprovided within +[`Provide.DHT.Interval`](#providedhtinterval), provider records start expiring +after +[`amino.DefaultProvideValidity`](https://github.com/libp2p/go-libp2p-kad-dht/blob/v0.34.0/amino/defaults.go#L40-L43), +making content undiscoverable. -Provide Sweep aims at replacing the inefficient legacy `boxo/provider` -module, and is currently opt-in. You can compare the effectiveness of sweep mode vs legacy mode by monitoring the appropriate metrics (see [Monitoring Provide Operations](#monitoring-provide-operations) above). +**How sweep mode works:** The sweep provider divides the DHT keyspace into +regions based on keyspace prefixes. It estimates the Amino DHT size, calculates +how many regions are needed (sized to contain at least 20 peers each), then +schedules region processing evenly across +[`Provide.DHT.Interval`](#providedhtinterval). When processing a region, it +discovers the peers in that region once, then sends all provider records for +CIDs allocated to those peers in a batch. This batching is the key efficiency: +instead of N lookups for N CIDs, the number of lookups is bounded by a constant +fraction of the Amino DHT size (e.g., ~3,000 lookups when there are ~10,000 DHT +servers), regardless of how many CIDs you're providing. -Whenever new keys should be advertised to the Amino DHT, `kubo` calls -`StartProviding()`, triggering an initial `provide` operation for the given -keys. The keys will be added to the `Keystore` tracking which keys should be -reprovided and when they should be reprovided. Calling `StopProviding()` -removes the keys from the `Keystore`. However, it is currently tricky for -`kubo` to detect when a key should stop being advertised. Hence, `kubo` will -periodically refresh the `Keystore` at each [`Provide.DHT.Interval`](#providedhtinterval) -by providing it a channel of all the keys it is expected to contain according -to the [`Provide.Strategy`](#providestrategy). During this operation, -all keys in the `Keystore` are purged, and only the given ones remain scheduled. +**Efficiency gains:** For a node providing 100,000 CIDs, sweep mode reduces +lookups by 97% compared to legacy. The work spreads smoothly over time rather +than completing in bursts, preventing resource spikes and duplicate +announcements. Long-running nodes reprovide systematically just before records +would expire, keeping content continuously discoverable without wasting +bandwidth. + +**Implementation details:** The sweep provider tracks CIDs in a persistent +keystore. New content added via `StartProviding()` enters the provide queue and +gets batched by keyspace region. The keystore is periodically refreshed at each +[`Provide.DHT.Interval`](#providedhtinterval) with CIDs matching +[`Provide.Strategy`](#providestrategy) to ensure only current content remains +scheduled. This handles cases where content is unpinned or removed. + +**Persistent reprovide cycle state:** When Provide Sweep is enabled, the +reprovide cycle state is persisted to the datastore by default. On restart, Kubo +automatically resumes from where it left off. If the node was offline for an +extended period, all CIDs that haven't been reprovided within the configured +[`Provide.DHT.Interval`](#providedhtinterval) are immediately queued for +reproviding. Additionally, the provide queue is persisted on shutdown and +restored on startup, ensuring no pending provide operations are lost. If you +don't want to keep the persisted provider state from a previous run, you can +disable this behavior by setting [`Provide.DHT.ResumeEnabled`](#providedhtresumeenabled) +to `false`. > > @@ -2088,13 +2157,15 @@ all keys in the `Keystore` are purged, and only the given ones remain scheduled. > Reprovide Cycle Comparison > > -> The diagram above visualizes the performance patterns: +> The diagram compares performance patterns: > -> - **Legacy mode**: Individual (slow) provides per CID, can struggle with large datasets -> - **Sweep mode**: Even distribution matching the keyspace sweep described with low resource usage -> - **Accelerated DHT**: Hourly traffic spikes with high resource usage +> - **Legacy mode**: Sequential processing, one lookup per CID, struggles with large datasets +> - **Sweep mode**: Smooth distribution over time, batched lookups by keyspace region, predictable resource usage +> - **Accelerated DHT**: Hourly network crawls creating traffic spikes, high resource usage > -> Sweep mode provides similar effectiveness to Accelerated DHT but with steady resource usage - better for machines with limited CPU, memory, or network bandwidth. +> Sweep mode achieves similar effectiveness to the Accelerated DHT client but with steady resource consumption. + +You can compare the effectiveness of sweep mode vs legacy mode by monitoring the appropriate metrics (see [Monitoring Provide Operations](#monitoring-provide-operations) above). > [!NOTE] > This feature is opt-in for now, but will become the default in a future release. @@ -2104,9 +2175,42 @@ Default: `false` Type: `flag` +#### `Provide.DHT.ResumeEnabled` + +Controls whether the provider resumes from its previous state on restart. Only +applies when `Provide.DHT.SweepEnabled` is true. + +When enabled (the default), the provider persists its reprovide cycle state and +provide queue to the datastore, and restores them on restart. This ensures: + +- The reprovide cycle continues from where it left off instead of starting over +- Any CIDs in the provide queue during shutdown are restored and provided after +restart +- CIDs that missed their reprovide window while the node was offline are queued +for immediate reproviding + +When disabled, the provider starts fresh on each restart, discarding any +previous reprovide cycle state and provide queue. On a fresh start, all CIDs +matching the [`Provide.Strategy`](#providestrategy) will be provided ASAP (as +burst provides), and then keyspace regions are reprovided according to the +regular schedule starting from the beginning of the reprovide cycle. + +> [!NOTE] +> Disabling this option means the provider will provide all content matching +> your strategy on every restart (which can be resource-intensive for large +> datasets), then start from the beginning of the reprovide cycle. For nodes +> with large datasets or frequent restarts, keeping this enabled (the default) +> is recommended for better resource efficiency and more consistent reproviding +> behavior. + +Default: `true` + +Type: `flag` + #### `Provide.DHT.DedicatedPeriodicWorkers` -Number of workers dedicated to periodic keyspace region reprovides. Only applies when `Provide.DHT.SweepEnabled` is true. +Number of workers dedicated to periodic keyspace region reprovides. Only +applies when `Provide.DHT.SweepEnabled` is true. Among the [`Provide.DHT.MaxWorkers`](#providedhtmaxworkers), this number of workers will be dedicated to the periodic region reprovide only. The sum of @@ -2167,7 +2271,13 @@ from that keyspace region until all provider records are assigned. This option defines how many such connections can be open concurrently by a single worker. -Default: `16` +> [!NOTE] +> Increasing this value can speed up the provide operation, at the cost of +> opening more simultaneous connections to DHT servers. A keyspace typically +> has less than 60 peers, so you may hit a performance ceiling beyond which +> increasing this value has no effect. + +Default: `20` Type: `optionalInteger` (non-negative) diff --git a/docs/examples/kubo-as-a-library/go.mod b/docs/examples/kubo-as-a-library/go.mod index ddd37c735..d77196d61 100644 --- a/docs/examples/kubo-as-a-library/go.mod +++ b/docs/examples/kubo-as-a-library/go.mod @@ -9,7 +9,7 @@ replace github.com/ipfs/kubo => ./../../.. require ( github.com/ipfs/boxo v0.35.1 github.com/ipfs/kubo v0.0.0-00010101000000-000000000000 - github.com/libp2p/go-libp2p v0.43.0 + github.com/libp2p/go-libp2p v0.44.0 github.com/multiformats/go-multiaddr v0.16.1 ) @@ -34,7 +34,7 @@ require ( github.com/cockroachdb/crlib v0.0.0-20241112164430-1264a2edc35b // indirect github.com/cockroachdb/errors v1.11.3 // indirect github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b // indirect - github.com/cockroachdb/pebble/v2 v2.1.0 // indirect + github.com/cockroachdb/pebble/v2 v2.1.1 // indirect github.com/cockroachdb/redact v1.1.5 // indirect github.com/cockroachdb/swiss v0.0.0-20250624142022-d6e517c1d961 // indirect github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 // indirect @@ -82,7 +82,7 @@ require ( github.com/ipfs/go-ds-flatfs v0.5.5 // indirect github.com/ipfs/go-ds-leveldb v0.5.2 // indirect github.com/ipfs/go-ds-measure v0.2.2 // indirect - github.com/ipfs/go-ds-pebble v0.5.3 // indirect + github.com/ipfs/go-ds-pebble v0.5.6 // indirect github.com/ipfs/go-dsqueue v0.1.0 // indirect github.com/ipfs/go-fs-lock v0.1.1 // indirect github.com/ipfs/go-ipfs-cmds v0.15.0 // indirect @@ -115,7 +115,7 @@ require ( github.com/libp2p/go-doh-resolver v0.5.0 // indirect github.com/libp2p/go-flow-metrics v0.3.0 // indirect github.com/libp2p/go-libp2p-asn-util v0.4.1 // indirect - github.com/libp2p/go-libp2p-kad-dht v0.35.1 // indirect + github.com/libp2p/go-libp2p-kad-dht v0.35.2-0.20251025120456-f33906fd2f32 // indirect github.com/libp2p/go-libp2p-kbucket v0.8.0 // indirect github.com/libp2p/go-libp2p-pubsub v0.14.2 // indirect github.com/libp2p/go-libp2p-pubsub-router v0.6.0 // indirect diff --git a/docs/examples/kubo-as-a-library/go.sum b/docs/examples/kubo-as-a-library/go.sum index 1b4fb7c8e..0fe61d434 100644 --- a/docs/examples/kubo-as-a-library/go.sum +++ b/docs/examples/kubo-as-a-library/go.sum @@ -95,8 +95,8 @@ github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b h1:r6VH0faHjZe github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs= github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895 h1:XANOgPYtvELQ/h4IrmPAohXqe2pWA8Bwhejr3VQoZsA= github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895/go.mod h1:aPd7gM9ov9M8v32Yy5NJrDyOcD8z642dqs+F0CeNXfA= -github.com/cockroachdb/pebble/v2 v2.1.0 h1:6KZvjSpWcEXZUvlLzTRC7T1A2G7r+bFskIzggklxixo= -github.com/cockroachdb/pebble/v2 v2.1.0/go.mod h1:Aza05DCCc05ghIJZkB4Q/axv/JK9wx5cFwWcnhG0eGw= +github.com/cockroachdb/pebble/v2 v2.1.1 h1:sUpUJjorLDSL4zIRFqoduCBaf2LewaMUXOoOpK+MrXQ= +github.com/cockroachdb/pebble/v2 v2.1.1/go.mod h1:Aza05DCCc05ghIJZkB4Q/axv/JK9wx5cFwWcnhG0eGw= github.com/cockroachdb/redact v1.1.5 h1:u1PMllDkdFfPWaNGMyLD1+so+aq3uUItthCFqzwPJ30= github.com/cockroachdb/redact v1.1.5/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= github.com/cockroachdb/swiss v0.0.0-20250624142022-d6e517c1d961 h1:Nua446ru3juLHLZd4AwKNzClZgL1co3pUPGv3o8FlcA= @@ -321,8 +321,8 @@ github.com/ipfs/go-ds-leveldb v0.5.2 h1:6nmxlQ2zbp4LCNdJVsmHfs9GP0eylfBNxpmY1csp github.com/ipfs/go-ds-leveldb v0.5.2/go.mod h1:2fAwmcvD3WoRT72PzEekHBkQmBDhc39DJGoREiuGmYo= github.com/ipfs/go-ds-measure v0.2.2 h1:4kwvBGbbSXNYe4ANlg7qTIYoZU6mNlqzQHdVqICkqGI= github.com/ipfs/go-ds-measure v0.2.2/go.mod h1:b/87ak0jMgH9Ylt7oH0+XGy4P8jHx9KG09Qz+pOeTIs= -github.com/ipfs/go-ds-pebble v0.5.3 h1:4esRt82+LkenUnIWyUCghR1gzRfqeCYGGKX/hRmabro= -github.com/ipfs/go-ds-pebble v0.5.3/go.mod h1:pn2bxYkAE7JRkbAF7D8xuEEFD3oOQ7QqQZPWkAVBs58= +github.com/ipfs/go-ds-pebble v0.5.6 h1:mxTlfqILdcVue/hbuYpF89ihrRz8qcv0YWk2UsdGC3c= +github.com/ipfs/go-ds-pebble v0.5.6/go.mod h1:q5C8c138Y/jWIJ6sfOnpKHkhJ4vVVI+e5J8OafhEr3U= github.com/ipfs/go-dsqueue v0.1.0 h1:OrahKDtT/Q+iMgKaM9XWdxrYPVASFpTuLah8QpKjboc= github.com/ipfs/go-dsqueue v0.1.0/go.mod h1:iLNkodSOSKTLn0gCvL9ikArz5rZfNh8F9/BRvHe7RbY= github.com/ipfs/go-fs-lock v0.1.1 h1:TecsP/Uc7WqYYatasreZQiP9EGRy4ZnKoG4yXxR33nw= @@ -424,14 +424,14 @@ github.com/libp2p/go-flow-metrics v0.0.1/go.mod h1:Iv1GH0sG8DtYN3SVJ2eG221wMiNpZ github.com/libp2p/go-flow-metrics v0.0.3/go.mod h1:HeoSNUrOJVK1jEpDqVEiUOIXqhbnS27omG0uWU5slZs= github.com/libp2p/go-flow-metrics v0.3.0 h1:q31zcHUvHnwDO0SHaukewPYgwOBSxtt830uJtUx6784= github.com/libp2p/go-flow-metrics v0.3.0/go.mod h1:nuhlreIwEguM1IvHAew3ij7A8BMlyHQJ279ao24eZZo= -github.com/libp2p/go-libp2p v0.43.0 h1:b2bg2cRNmY4HpLK8VHYQXLX2d3iND95OjodLFymvqXU= -github.com/libp2p/go-libp2p v0.43.0/go.mod h1:IiSqAXDyP2sWH+J2gs43pNmB/y4FOi2XQPbsb+8qvzc= +github.com/libp2p/go-libp2p v0.44.0 h1:5Gtt8OrF8yiXmH+Mx4+/iBeFRMK1TY3a8OrEBDEqAvs= +github.com/libp2p/go-libp2p v0.44.0/go.mod h1:NovCojezAt4dnDd4fH048K7PKEqH0UFYYqJRjIIu8zc= github.com/libp2p/go-libp2p-asn-util v0.4.1 h1:xqL7++IKD9TBFMgnLPZR6/6iYhawHKHl950SO9L6n94= github.com/libp2p/go-libp2p-asn-util v0.4.1/go.mod h1:d/NI6XZ9qxw67b4e+NgpQexCIiFYJjErASrYW4PFDN8= github.com/libp2p/go-libp2p-core v0.2.4/go.mod h1:STh4fdfa5vDYr0/SzYYeqnt+E6KfEV5VxfIrm0bcI0g= github.com/libp2p/go-libp2p-core v0.3.0/go.mod h1:ACp3DmS3/N64c2jDzcV429ukDpicbL6+TrrxANBjPGw= -github.com/libp2p/go-libp2p-kad-dht v0.35.1 h1:RQglhc9OxqDwlFFdhQMwKxIPBIBfGsleROnK5hqVsoE= -github.com/libp2p/go-libp2p-kad-dht v0.35.1/go.mod h1:1oCXzkkBiYh3d5cMWLpInSOZ6am2AlpC4G+GDcZFcE0= +github.com/libp2p/go-libp2p-kad-dht v0.35.2-0.20251025120456-f33906fd2f32 h1:xZj18PsLD157snR/BFo547jwOkGDH7jZjMEkBDOoD4Q= +github.com/libp2p/go-libp2p-kad-dht v0.35.2-0.20251025120456-f33906fd2f32/go.mod h1:aHMTg23iseX9grGSfA5gFUzLrqzmYbA8PqgGPqM8VkI= github.com/libp2p/go-libp2p-kbucket v0.3.1/go.mod h1:oyjT5O7tS9CQurok++ERgc46YLwEpuGoFq9ubvoUOio= github.com/libp2p/go-libp2p-kbucket v0.8.0 h1:QAK7RzKJpYe+EuSEATAaaHYMYLkPDGC18m9jxPLnU8s= github.com/libp2p/go-libp2p-kbucket v0.8.0/go.mod h1:JMlxqcEyKwO6ox716eyC0hmiduSWZZl6JY93mGaaqc4= @@ -465,6 +465,8 @@ github.com/lunixbochs/vtclean v1.0.0/go.mod h1:pHhQNgMf3btfWnGBVipUOjRYhoOsdGqdm github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/mailru/easyjson v0.0.0-20180823135443-60711f1a8329/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/marcopolo/simnet v0.0.1 h1:rSMslhPz6q9IvJeFWDoMGxMIrlsbXau3NkuIXHGJxfg= +github.com/marcopolo/simnet v0.0.1/go.mod h1:WDaQkgLAjqDUEBAOXz22+1j6wXKfGlC5sD5XWt3ddOs= github.com/marten-seemann/tcp v0.0.0-20210406111302-dfbc87cc63fd h1:br0buuQ854V8u83wA0rVZ8ttrq5CpaPZdvrK0LP2lOk= github.com/marten-seemann/tcp v0.0.0-20210406111302-dfbc87cc63fd/go.mod h1:QuCEs1Nt24+FYQEqAAncTDPJIuGs+LxK1MCiFL25pMU= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= diff --git a/docs/provide-stats.md b/docs/provide-stats.md new file mode 100644 index 000000000..cf9a9f057 --- /dev/null +++ b/docs/provide-stats.md @@ -0,0 +1,208 @@ +# Provide Stats + +The `ipfs provide stat` command gives you statistics about your local provide +system. This file provides a detailed explanation of the metrics reported by +this command. + +## Connectivity + +### Status + +Current connectivity status (`online`, `disconnected`, or `offline`) and when +it last changed (see [provide connectivity +status](./config.md#providedhtofflinedelay)). + +## Queues + +### Provide queue + +Number of CIDs waiting for initial provide, and the number of keyspace regions +they're grouped into. + +### Reprovide queue + +Number of regions with overdue reprovides. These regions missed their scheduled +reprovide time and will be processed as soon as possible. If decreasing, the +node is recovering from downtime. If increasing, either the node is offline or +the provide system needs more workers (see +[`Provide.DHT.MaxWorkers`](./config.md#providedhtmaxworkers) +and +[`Provide.DHT.DedicatedPeriodicWorkers`](./config.md#providedhtdedicatedperiodicworkers)). + +## Schedule + +### CIDs scheduled + +Total CIDs scheduled for reprovide. + +### Regions scheduled + +Number of keyspace regions scheduled for reprovide. Each CID is mapped to a +specific region, and all CIDs within the same region are reprovided together as +a batch for efficient processing. + +### Avg prefix length + +Average length of binary prefixes identifying the scheduled regions. Each +keyspace region is identified by a binary prefix, and this shows the average +prefix length across all regions in the schedule. Longer prefixes indicate the +keyspace is divided into more regions (because there are more DHT servers in the +swarm to distribute records across). + +### Next region prefix + +Keyspace prefix of the next region to be reprovided. + +### Next region reprovide + +When the next region is scheduled to be reprovided. + +## Timings + +### Uptime + +How long the provide system has been running since Kubo started, along with the +start timestamp. + +### Current time offset + +Elapsed time in the current reprovide cycle, showing cycle progress (e.g., '11h' +means 11 hours into a 22-hour cycle, roughly halfway through). + +### Cycle started + +When the current reprovide cycle began. + +### Reprovide interval + +How often each CID is reprovided (the complete cycle duration). + +## Network + +### Avg record holders + +Average number of provider records successfully sent for each CID to distinct +DHT servers. In practice, this is often lower than the [replication +factor](#replication-factor) due to unreachable peers or timeouts. Matching the +replication factor would indicate all DHT servers are reachable. + +Note: this counts successful sends; some DHT servers may have gone offline +afterward, so actual availability may be lower. + +### Peers swept + +Number of DHT servers to which we tried to send provider records in the last +reprovide cycle (sweep). Excludes peers contacted during initial provides or +DHT lookups. + +### Full keyspace coverage + +Whether provider records were sent to all DHT servers in the swarm during the +last reprovide cycle. If true, [peers swept](#peers-swept) approximates the +total DHT swarm size over the last [reprovide interval](#reprovide-interval). + +### Reachable peers + +Number and percentage of peers to which we successfully sent all provider +records assigned to them during the last reprovide cycle. + +### Avg region size + +Average number of DHT servers per keyspace region. + +### Replication factor + +Target number of DHT servers to receive each provider record. + +## Operations + +### Ongoing provides + +Number of CIDs and regions currently being provided for the first time. More +CIDs than regions indicates efficient batching. Each region provide uses a +[burst +worker](./config.md#providedhtdedicatedburstworkers). + +### Ongoing reprovides + +Number of CIDs and regions currently being reprovided. Each region reprovide +uses a [periodic +worker](./config.md#providedhtdedicatedperiodicworkers). + +### Total CIDs provided + +Total number of provide operations since node startup (includes both provides +and reprovides). + +### Total records provided + +Total provider records successfully sent to DHT servers since startup (includes +reprovides). + +### Total provide errors + +Number of failed region provide/reprovide operations since startup. Failed +regions are automatically retried unless the node is offline. + +### CIDs provided/min/worker + +Average rate of initial provides per minute per worker during the last +reprovide cycle (excludes reprovides). Each worker handles one keyspace region +at a time, providing all CIDs in that region. This rate only counts active time +(timer doesn't run when no initial provides are being processed). The overall +provide rate can be higher when multiple workers are providing different +regions concurrently. + +### CIDs reprovided/min/worker + +Average rate of reprovides per minute per worker during the last reprovide +cycle (excludes initial provides). Each worker handles one keyspace region at a +time, reproviding all CIDs in that region. The overall reprovide rate can be +higher when multiple workers are reproviding different regions concurrently. To +estimate total reprovide rate, multiply by the number of [periodic +workers](./config.md#providedhtdedicatedperiodicworkers) in use. + +### Region reprovide duration + +Average time to reprovide all CIDs in a region during the last cycle. + +### Avg CIDs/reprovide + +Average number of CIDs per region during the last reprovide cycle. + +### Regions reprovided (last cycle) + +Number of regions reprovided in the last cycle. + +## Workers + +### Active workers + +Number of workers currently processing provide or reprovide operations. + +### Free workers + +Number of idle workers not reserved for periodic or burst tasks. + +### Workers stats + +Breakdown of worker status by type (periodic for scheduled reprovides, burst for +initial provides). For each type: + +- **Active**: Currently processing operations +- **Dedicated**: Reserved for this type +- **Available**: Idle dedicated workers + [free workers](#free-workers) +- **Queued**: 0 or 1 (workers acquired only when needed) + +See [provide queue](#provide-queue) and [reprovide queue](#reprovide-queue) for +regions waiting to be processed. + +### Max connections/worker + +Maximum concurrent DHT server connections per worker when sending provider +records for a region. + +## See Also + +- [Provide configuration reference](./config.md#provide) +- [Provide metrics for Prometheus](./metrics.md#provide) diff --git a/fuse/ipns/ipns_unix.go b/fuse/ipns/ipns_unix.go index f291c9470..44085e526 100644 --- a/fuse/ipns/ipns_unix.go +++ b/fuse/ipns/ipns_unix.go @@ -528,13 +528,6 @@ func (d *Directory) Rename(ctx context.Context, req *fuse.RenameRequest, newDir return nil } -func min(a, b int) int { - if a < b { - return a - } - return b -} - // to check that out Node implements all the interfaces we want. type ipnsRoot interface { fs.Node diff --git a/go.mod b/go.mod index 888bef3b5..a24c63ca2 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ require ( github.com/cenkalti/backoff/v4 v4.3.0 github.com/ceramicnetwork/go-dag-jose v0.1.1 github.com/cheggaaa/pb v1.0.29 - github.com/cockroachdb/pebble/v2 v2.1.0 + github.com/cockroachdb/pebble/v2 v2.1.1 github.com/coreos/go-systemd/v22 v22.5.0 github.com/dustin/go-humanize v1.0.1 github.com/elgris/jsondiff v0.0.0-20160530203242-765b5c24c302 @@ -32,7 +32,7 @@ require ( github.com/ipfs/go-ds-flatfs v0.5.5 github.com/ipfs/go-ds-leveldb v0.5.2 github.com/ipfs/go-ds-measure v0.2.2 - github.com/ipfs/go-ds-pebble v0.5.3 + github.com/ipfs/go-ds-pebble v0.5.6 github.com/ipfs/go-fs-lock v0.1.1 github.com/ipfs/go-ipfs-cmds v0.15.0 github.com/ipfs/go-ipld-cbor v0.2.1 @@ -51,9 +51,9 @@ require ( github.com/jbenet/go-temp-err-catcher v0.1.0 github.com/julienschmidt/httprouter v1.3.0 github.com/libp2p/go-doh-resolver v0.5.0 - github.com/libp2p/go-libp2p v0.43.0 + github.com/libp2p/go-libp2p v0.44.0 github.com/libp2p/go-libp2p-http v0.5.0 - github.com/libp2p/go-libp2p-kad-dht v0.35.1 + github.com/libp2p/go-libp2p-kad-dht v0.35.2-0.20251025120456-f33906fd2f32 github.com/libp2p/go-libp2p-kbucket v0.8.0 github.com/libp2p/go-libp2p-pubsub v0.14.2 github.com/libp2p/go-libp2p-pubsub-router v0.6.0 @@ -69,6 +69,7 @@ require ( github.com/multiformats/go-multihash v0.2.3 github.com/opentracing/opentracing-go v1.2.0 github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 + github.com/probe-lab/go-libdht v0.3.0 github.com/prometheus/client_golang v1.23.2 github.com/stretchr/testify v1.11.1 github.com/syndtr/goleveldb v1.0.1-0.20220721030215-126854af5e6d @@ -215,7 +216,6 @@ require ( github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/polydawn/refmt v0.89.0 // indirect - github.com/probe-lab/go-libdht v0.3.0 // indirect github.com/prometheus/client_model v0.6.2 // indirect github.com/prometheus/common v0.66.1 // indirect github.com/prometheus/procfs v0.17.0 // indirect diff --git a/go.sum b/go.sum index b6d8b7e64..5791d79c9 100644 --- a/go.sum +++ b/go.sum @@ -126,8 +126,8 @@ github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b h1:r6VH0faHjZe github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs= github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895 h1:XANOgPYtvELQ/h4IrmPAohXqe2pWA8Bwhejr3VQoZsA= github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895/go.mod h1:aPd7gM9ov9M8v32Yy5NJrDyOcD8z642dqs+F0CeNXfA= -github.com/cockroachdb/pebble/v2 v2.1.0 h1:6KZvjSpWcEXZUvlLzTRC7T1A2G7r+bFskIzggklxixo= -github.com/cockroachdb/pebble/v2 v2.1.0/go.mod h1:Aza05DCCc05ghIJZkB4Q/axv/JK9wx5cFwWcnhG0eGw= +github.com/cockroachdb/pebble/v2 v2.1.1 h1:sUpUJjorLDSL4zIRFqoduCBaf2LewaMUXOoOpK+MrXQ= +github.com/cockroachdb/pebble/v2 v2.1.1/go.mod h1:Aza05DCCc05ghIJZkB4Q/axv/JK9wx5cFwWcnhG0eGw= github.com/cockroachdb/redact v1.1.5 h1:u1PMllDkdFfPWaNGMyLD1+so+aq3uUItthCFqzwPJ30= github.com/cockroachdb/redact v1.1.5/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= github.com/cockroachdb/swiss v0.0.0-20250624142022-d6e517c1d961 h1:Nua446ru3juLHLZd4AwKNzClZgL1co3pUPGv3o8FlcA= @@ -388,8 +388,8 @@ github.com/ipfs/go-ds-leveldb v0.5.2 h1:6nmxlQ2zbp4LCNdJVsmHfs9GP0eylfBNxpmY1csp github.com/ipfs/go-ds-leveldb v0.5.2/go.mod h1:2fAwmcvD3WoRT72PzEekHBkQmBDhc39DJGoREiuGmYo= github.com/ipfs/go-ds-measure v0.2.2 h1:4kwvBGbbSXNYe4ANlg7qTIYoZU6mNlqzQHdVqICkqGI= github.com/ipfs/go-ds-measure v0.2.2/go.mod h1:b/87ak0jMgH9Ylt7oH0+XGy4P8jHx9KG09Qz+pOeTIs= -github.com/ipfs/go-ds-pebble v0.5.3 h1:4esRt82+LkenUnIWyUCghR1gzRfqeCYGGKX/hRmabro= -github.com/ipfs/go-ds-pebble v0.5.3/go.mod h1:pn2bxYkAE7JRkbAF7D8xuEEFD3oOQ7QqQZPWkAVBs58= +github.com/ipfs/go-ds-pebble v0.5.6 h1:mxTlfqILdcVue/hbuYpF89ihrRz8qcv0YWk2UsdGC3c= +github.com/ipfs/go-ds-pebble v0.5.6/go.mod h1:q5C8c138Y/jWIJ6sfOnpKHkhJ4vVVI+e5J8OafhEr3U= github.com/ipfs/go-dsqueue v0.1.0 h1:OrahKDtT/Q+iMgKaM9XWdxrYPVASFpTuLah8QpKjboc= github.com/ipfs/go-dsqueue v0.1.0/go.mod h1:iLNkodSOSKTLn0gCvL9ikArz5rZfNh8F9/BRvHe7RbY= github.com/ipfs/go-fs-lock v0.1.1 h1:TecsP/Uc7WqYYatasreZQiP9EGRy4ZnKoG4yXxR33nw= @@ -504,8 +504,8 @@ github.com/libp2p/go-flow-metrics v0.0.1/go.mod h1:Iv1GH0sG8DtYN3SVJ2eG221wMiNpZ github.com/libp2p/go-flow-metrics v0.0.3/go.mod h1:HeoSNUrOJVK1jEpDqVEiUOIXqhbnS27omG0uWU5slZs= github.com/libp2p/go-flow-metrics v0.3.0 h1:q31zcHUvHnwDO0SHaukewPYgwOBSxtt830uJtUx6784= github.com/libp2p/go-flow-metrics v0.3.0/go.mod h1:nuhlreIwEguM1IvHAew3ij7A8BMlyHQJ279ao24eZZo= -github.com/libp2p/go-libp2p v0.43.0 h1:b2bg2cRNmY4HpLK8VHYQXLX2d3iND95OjodLFymvqXU= -github.com/libp2p/go-libp2p v0.43.0/go.mod h1:IiSqAXDyP2sWH+J2gs43pNmB/y4FOi2XQPbsb+8qvzc= +github.com/libp2p/go-libp2p v0.44.0 h1:5Gtt8OrF8yiXmH+Mx4+/iBeFRMK1TY3a8OrEBDEqAvs= +github.com/libp2p/go-libp2p v0.44.0/go.mod h1:NovCojezAt4dnDd4fH048K7PKEqH0UFYYqJRjIIu8zc= github.com/libp2p/go-libp2p-asn-util v0.4.1 h1:xqL7++IKD9TBFMgnLPZR6/6iYhawHKHl950SO9L6n94= github.com/libp2p/go-libp2p-asn-util v0.4.1/go.mod h1:d/NI6XZ9qxw67b4e+NgpQexCIiFYJjErASrYW4PFDN8= github.com/libp2p/go-libp2p-core v0.2.4/go.mod h1:STh4fdfa5vDYr0/SzYYeqnt+E6KfEV5VxfIrm0bcI0g= @@ -514,8 +514,8 @@ github.com/libp2p/go-libp2p-gostream v0.6.0 h1:QfAiWeQRce6pqnYfmIVWJFXNdDyfiR/qk github.com/libp2p/go-libp2p-gostream v0.6.0/go.mod h1:Nywu0gYZwfj7Jc91PQvbGU8dIpqbQQkjWgDuOrFaRdA= github.com/libp2p/go-libp2p-http v0.5.0 h1:+x0AbLaUuLBArHubbbNRTsgWz0RjNTy6DJLOxQ3/QBc= github.com/libp2p/go-libp2p-http v0.5.0/go.mod h1:glh87nZ35XCQyFsdzZps6+F4HYI6DctVFY5u1fehwSg= -github.com/libp2p/go-libp2p-kad-dht v0.35.1 h1:RQglhc9OxqDwlFFdhQMwKxIPBIBfGsleROnK5hqVsoE= -github.com/libp2p/go-libp2p-kad-dht v0.35.1/go.mod h1:1oCXzkkBiYh3d5cMWLpInSOZ6am2AlpC4G+GDcZFcE0= +github.com/libp2p/go-libp2p-kad-dht v0.35.2-0.20251025120456-f33906fd2f32 h1:xZj18PsLD157snR/BFo547jwOkGDH7jZjMEkBDOoD4Q= +github.com/libp2p/go-libp2p-kad-dht v0.35.2-0.20251025120456-f33906fd2f32/go.mod h1:aHMTg23iseX9grGSfA5gFUzLrqzmYbA8PqgGPqM8VkI= github.com/libp2p/go-libp2p-kbucket v0.3.1/go.mod h1:oyjT5O7tS9CQurok++ERgc46YLwEpuGoFq9ubvoUOio= github.com/libp2p/go-libp2p-kbucket v0.8.0 h1:QAK7RzKJpYe+EuSEATAaaHYMYLkPDGC18m9jxPLnU8s= github.com/libp2p/go-libp2p-kbucket v0.8.0/go.mod h1:JMlxqcEyKwO6ox716eyC0hmiduSWZZl6JY93mGaaqc4= @@ -551,6 +551,8 @@ github.com/lunixbochs/vtclean v1.0.0/go.mod h1:pHhQNgMf3btfWnGBVipUOjRYhoOsdGqdm github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/mailru/easyjson v0.0.0-20180823135443-60711f1a8329/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/marcopolo/simnet v0.0.1 h1:rSMslhPz6q9IvJeFWDoMGxMIrlsbXau3NkuIXHGJxfg= +github.com/marcopolo/simnet v0.0.1/go.mod h1:WDaQkgLAjqDUEBAOXz22+1j6wXKfGlC5sD5XWt3ddOs= github.com/marten-seemann/tcp v0.0.0-20210406111302-dfbc87cc63fd h1:br0buuQ854V8u83wA0rVZ8ttrq5CpaPZdvrK0LP2lOk= github.com/marten-seemann/tcp v0.0.0-20210406111302-dfbc87cc63fd/go.mod h1:QuCEs1Nt24+FYQEqAAncTDPJIuGs+LxK1MCiFL25pMU= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= diff --git a/repo/common/common.go b/repo/common/common.go index 6a6dee6dd..7c82f3ec3 100644 --- a/repo/common/common.go +++ b/repo/common/common.go @@ -2,6 +2,7 @@ package common import ( "fmt" + "maps" "strings" ) @@ -65,9 +66,9 @@ func MapSetKV(v map[string]interface{}, key string, value interface{}) error { // child maps until a non-map value is found. func MapMergeDeep(left, right map[string]interface{}) map[string]interface{} { // We want to alter a copy of the map, not the original - result := make(map[string]interface{}) - for k, v := range left { - result[k] = v + result := maps.Clone(left) + if result == nil { + result = make(map[string]interface{}) } for key, rightVal := range right { diff --git a/routing/delegated.go b/routing/delegated.go index fb89e8150..9f6a39667 100644 --- a/routing/delegated.go +++ b/routing/delegated.go @@ -6,6 +6,8 @@ import ( "errors" "fmt" "net/http" + "path" + "strings" drclient "github.com/ipfs/boxo/routing/http/client" "github.com/ipfs/boxo/routing/http/contentrouter" @@ -24,6 +26,7 @@ import ( "github.com/libp2p/go-libp2p/core/routing" ma "github.com/multiformats/go-multiaddr" "go.opencensus.io/stats/view" + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) var log = logging.Logger("routing/delegated") @@ -187,8 +190,27 @@ func httpRoutingFromConfig(conf config.Router, extraHTTP *ExtraHTTPParams) (rout delegateHTTPClient := &http.Client{ Transport: &drclient.ResponseBodyLimitedTransport{ - RoundTripper: transport, - LimitBytes: 1 << 20, + RoundTripper: otelhttp.NewTransport(transport, + otelhttp.WithSpanNameFormatter(func(operation string, req *http.Request) string { + if req.Method == http.MethodGet { + switch { + case strings.HasPrefix(req.URL.Path, "/routing/v1/providers"): + return "DelegatedHTTPClient.FindProviders" + case strings.HasPrefix(req.URL.Path, "/routing/v1/peers"): + return "DelegatedHTTPClient.FindPeers" + case strings.HasPrefix(req.URL.Path, "/routing/v1/ipns"): + return "DelegatedHTTPClient.GetIPNS" + } + } else if req.Method == http.MethodPut { + switch { + case strings.HasPrefix(req.URL.Path, "/routing/v1/ipns"): + return "DelegatedHTTPClient.PutIPNS" + } + } + return "DelegatedHTTPClient." + path.Dir(req.URL.Path) + }), + ), + LimitBytes: 1 << 20, }, } diff --git a/test/cli/provide_stats_test.go b/test/cli/provide_stats_test.go new file mode 100644 index 000000000..fede31c0f --- /dev/null +++ b/test/cli/provide_stats_test.go @@ -0,0 +1,524 @@ +package cli + +import ( + "bufio" + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/ipfs/kubo/test/cli/harness" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const ( + provideStatEventuallyTimeout = 15 * time.Second + provideStatEventuallyTick = 100 * time.Millisecond +) + +// sweepStats mirrors the subset of JSON fields actually used by tests. +// This type is intentionally independent from upstream types to detect breaking changes. +// Only includes fields that tests actually access to keep it simple and maintainable. +type sweepStats struct { + Sweep struct { + Closed bool `json:"closed"` + Connectivity struct { + Status string `json:"status"` + } `json:"connectivity"` + Queues struct { + PendingKeyProvides int `json:"pending_key_provides"` + } `json:"queues"` + Schedule struct { + Keys int `json:"keys"` + } `json:"schedule"` + } `json:"Sweep"` +} + +// parseSweepStats parses JSON output from ipfs provide stat command. +// Tests will naturally fail if upstream removes/renames fields we depend on. +func parseSweepStats(t *testing.T, jsonOutput string) sweepStats { + t.Helper() + var stats sweepStats + err := json.Unmarshal([]byte(jsonOutput), &stats) + require.NoError(t, err, "failed to parse provide stat JSON output") + return stats +} + +// TestProvideStatAllMetricsDocumented verifies that all metrics output by +// `ipfs provide stat --all` are documented in docs/provide-stats.md. +// +// The test works as follows: +// 1. Starts an IPFS node with Provide.DHT.SweepEnabled=true +// 2. Runs `ipfs provide stat --all` to get all metrics +// 3. Parses the output and extracts all lines with exactly 2 spaces indent +// (these are the actual metric lines) +// 4. Reads docs/provide-stats.md and extracts all ### section headers +// 5. Ensures every metric in the output has a corresponding ### section in the docs +func TestProvideStatAllMetricsDocumented(t *testing.T) { + t.Parallel() + + h := harness.NewT(t) + node := h.NewNode().Init() + + // Enable sweep provider + node.SetIPFSConfig("Provide.DHT.SweepEnabled", true) + node.SetIPFSConfig("Provide.Enabled", true) + + node.StartDaemon() + defer node.StopDaemon() + + // Run `ipfs provide stat --all` to get all metrics + res := node.IPFS("provide", "stat", "--all") + require.NoError(t, res.Err) + + // Parse metrics from the command output + // Only consider lines with exactly two spaces of padding (" ") + // These are the actual metric lines as shown in provide.go + outputMetrics := make(map[string]bool) + scanner := bufio.NewScanner(strings.NewReader(res.Stdout.String())) + // Only consider lines that start with exactly two spaces + indent := " " + for scanner.Scan() { + line := scanner.Text() + if !strings.HasPrefix(line, indent) || strings.HasPrefix(line, indent) { + continue + } + + // Remove the indent + line = strings.TrimPrefix(line, indent) + + // Extract metric name - everything before the first ':' + parts := strings.SplitN(line, ":", 2) + if len(parts) >= 1 { + metricName := strings.TrimSpace(parts[0]) + if metricName != "" { + outputMetrics[metricName] = true + } + } + } + require.NoError(t, scanner.Err()) + + // Read docs/provide-stats.md + // Find the repo root by looking for go.mod + repoRoot := ".." + for range 6 { + if _, err := os.Stat(filepath.Join(repoRoot, "go.mod")); err == nil { + break + } + repoRoot = filepath.Join("..", repoRoot) + } + docsPath := filepath.Join(repoRoot, "docs", "provide-stats.md") + docsFile, err := os.Open(docsPath) + require.NoError(t, err, "Failed to open provide-stats.md") + defer docsFile.Close() + + // Parse all ### metric headers from the docs + documentedMetrics := make(map[string]bool) + docsScanner := bufio.NewScanner(docsFile) + for docsScanner.Scan() { + line := docsScanner.Text() + if metricName, found := strings.CutPrefix(line, "### "); found { + metricName = strings.TrimSpace(metricName) + documentedMetrics[metricName] = true + } + } + require.NoError(t, docsScanner.Err()) + + // Check that all output metrics are documented + var undocumentedMetrics []string + for metric := range outputMetrics { + if !documentedMetrics[metric] { + undocumentedMetrics = append(undocumentedMetrics, metric) + } + } + + require.Empty(t, undocumentedMetrics, + "The following metrics from 'ipfs provide stat --all' are not documented in docs/provide-stats.md: %v\n"+ + "All output metrics: %v\n"+ + "Documented metrics: %v", + undocumentedMetrics, outputMetrics, documentedMetrics) +} + +// TestProvideStatBasic tests basic functionality of ipfs provide stat +func TestProvideStatBasic(t *testing.T) { + t.Parallel() + + t.Run("works with Sweep provider and shows brief output", func(t *testing.T) { + t.Parallel() + + h := harness.NewT(t) + node := h.NewNode().Init() + node.SetIPFSConfig("Provide.DHT.SweepEnabled", true) + node.SetIPFSConfig("Provide.Enabled", true) + node.StartDaemon() + defer node.StopDaemon() + + res := node.IPFS("provide", "stat") + require.NoError(t, res.Err) + assert.Empty(t, res.Stderr.String()) + + output := res.Stdout.String() + // Brief output should contain specific full labels + assert.Contains(t, output, "Provide queue:") + assert.Contains(t, output, "Reprovide queue:") + assert.Contains(t, output, "CIDs scheduled:") + assert.Contains(t, output, "Regions scheduled:") + assert.Contains(t, output, "Avg record holders:") + assert.Contains(t, output, "Ongoing provides:") + assert.Contains(t, output, "Ongoing reprovides:") + assert.Contains(t, output, "Total CIDs provided:") + }) + + t.Run("requires daemon to be online", func(t *testing.T) { + t.Parallel() + + h := harness.NewT(t) + node := h.NewNode().Init() + + res := node.RunIPFS("provide", "stat") + assert.Error(t, res.Err) + assert.Contains(t, res.Stderr.String(), "this command must be run in online mode") + }) +} + +// TestProvideStatFlags tests various command flags +func TestProvideStatFlags(t *testing.T) { + t.Parallel() + + t.Run("--all flag shows all sections with headings", func(t *testing.T) { + t.Parallel() + + h := harness.NewT(t) + node := h.NewNode().Init() + node.SetIPFSConfig("Provide.DHT.SweepEnabled", true) + node.SetIPFSConfig("Provide.Enabled", true) + node.StartDaemon() + defer node.StopDaemon() + + res := node.IPFS("provide", "stat", "--all") + require.NoError(t, res.Err) + + output := res.Stdout.String() + // Should contain section headings with colons + assert.Contains(t, output, "Connectivity:") + assert.Contains(t, output, "Queues:") + assert.Contains(t, output, "Schedule:") + assert.Contains(t, output, "Timings:") + assert.Contains(t, output, "Network:") + assert.Contains(t, output, "Operations:") + assert.Contains(t, output, "Workers:") + + // Should contain detailed metrics not in brief mode + assert.Contains(t, output, "Uptime:") + assert.Contains(t, output, "Cycle started:") + assert.Contains(t, output, "Reprovide interval:") + assert.Contains(t, output, "Peers swept:") + assert.Contains(t, output, "Full keyspace coverage:") + }) + + t.Run("--compact requires --all", func(t *testing.T) { + t.Parallel() + + h := harness.NewT(t) + node := h.NewNode().Init() + node.SetIPFSConfig("Provide.DHT.SweepEnabled", true) + node.SetIPFSConfig("Provide.Enabled", true) + node.StartDaemon() + defer node.StopDaemon() + + res := node.RunIPFS("provide", "stat", "--compact") + assert.Error(t, res.Err) + assert.Contains(t, res.Stderr.String(), "--compact requires --all flag") + }) + + t.Run("--compact with --all shows 2-column layout", func(t *testing.T) { + t.Parallel() + + h := harness.NewT(t) + node := h.NewNode().Init() + node.SetIPFSConfig("Provide.DHT.SweepEnabled", true) + node.SetIPFSConfig("Provide.Enabled", true) + node.StartDaemon() + defer node.StopDaemon() + + res := node.IPFS("provide", "stat", "--all", "--compact") + require.NoError(t, res.Err) + + output := res.Stdout.String() + lines := strings.Split(strings.TrimSpace(output), "\n") + require.NotEmpty(t, lines) + + // In compact mode, find a line that has both Schedule and Connectivity metrics + // This confirms 2-column layout is working + foundTwoColumns := false + for _, line := range lines { + if strings.Contains(line, "CIDs scheduled:") && strings.Contains(line, "Status:") { + foundTwoColumns = true + break + } + } + assert.True(t, foundTwoColumns, "Should have at least one line with both 'CIDs scheduled:' and 'Status:' confirming 2-column layout") + }) + + t.Run("individual section flags work with full labels", func(t *testing.T) { + t.Parallel() + + h := harness.NewT(t) + node := h.NewNode().Init() + node.SetIPFSConfig("Provide.DHT.SweepEnabled", true) + node.SetIPFSConfig("Provide.Enabled", true) + node.StartDaemon() + defer node.StopDaemon() + + testCases := []struct { + flag string + contains []string + }{ + { + flag: "--connectivity", + contains: []string{"Status:"}, + }, + { + flag: "--queues", + contains: []string{"Provide queue:", "Reprovide queue:"}, + }, + { + flag: "--schedule", + contains: []string{"CIDs scheduled:", "Regions scheduled:", "Avg prefix length:", "Next region prefix:", "Next region reprovide:"}, + }, + { + flag: "--timings", + contains: []string{"Uptime:", "Current time offset:", "Cycle started:", "Reprovide interval:"}, + }, + { + flag: "--network", + contains: []string{"Avg record holders:", "Peers swept:", "Full keyspace coverage:", "Reachable peers:", "Avg region size:", "Replication factor:"}, + }, + { + flag: "--operations", + contains: []string{"Ongoing provides:", "Ongoing reprovides:", "Total CIDs provided:", "Total records provided:", "Total provide errors:"}, + }, + { + flag: "--workers", + contains: []string{"Active workers:", "Free workers:", "Workers stats:", "Periodic", "Burst"}, + }, + } + + for _, tc := range testCases { + res := node.IPFS("provide", "stat", tc.flag) + require.NoError(t, res.Err, "flag %s should work", tc.flag) + output := res.Stdout.String() + for _, expected := range tc.contains { + assert.Contains(t, output, expected, "flag %s should contain '%s'", tc.flag, expected) + } + } + }) + + t.Run("multiple section flags can be combined", func(t *testing.T) { + t.Parallel() + + h := harness.NewT(t) + node := h.NewNode().Init() + node.SetIPFSConfig("Provide.DHT.SweepEnabled", true) + node.SetIPFSConfig("Provide.Enabled", true) + node.StartDaemon() + defer node.StopDaemon() + + res := node.IPFS("provide", "stat", "--network", "--operations") + require.NoError(t, res.Err) + + output := res.Stdout.String() + // Should have section headings when multiple flags combined + assert.Contains(t, output, "Network:") + assert.Contains(t, output, "Operations:") + assert.Contains(t, output, "Avg record holders:") + assert.Contains(t, output, "Ongoing provides:") + }) +} + +// TestProvideStatLegacyProvider tests Legacy provider specific behavior +func TestProvideStatLegacyProvider(t *testing.T) { + t.Parallel() + + h := harness.NewT(t) + node := h.NewNode().Init() + node.SetIPFSConfig("Provide.DHT.SweepEnabled", false) + node.SetIPFSConfig("Provide.Enabled", true) + node.StartDaemon() + defer node.StopDaemon() + + t.Run("shows legacy stats from old provider system", func(t *testing.T) { + res := node.IPFS("provide", "stat") + require.NoError(t, res.Err) + + // Legacy provider shows stats from the old reprovider system + output := res.Stdout.String() + assert.Contains(t, output, "TotalReprovides:") + assert.Contains(t, output, "AvgReprovideDuration:") + assert.Contains(t, output, "LastReprovideDuration:") + }) + + t.Run("rejects flags with legacy provider", func(t *testing.T) { + flags := []string{"--all", "--connectivity", "--queues", "--network", "--workers"} + for _, flag := range flags { + res := node.RunIPFS("provide", "stat", flag) + assert.Error(t, res.Err, "flag %s should be rejected for legacy provider", flag) + assert.Contains(t, res.Stderr.String(), "cannot use flags with legacy provide stats") + } + }) + + t.Run("rejects --lan flag with legacy provider", func(t *testing.T) { + res := node.RunIPFS("provide", "stat", "--lan") + assert.Error(t, res.Err) + assert.Contains(t, res.Stderr.String(), "LAN stats only available for Sweep provider with Dual DHT") + }) +} + +// TestProvideStatOutputFormats tests different output formats +func TestProvideStatOutputFormats(t *testing.T) { + t.Parallel() + + t.Run("JSON output with Sweep provider", func(t *testing.T) { + t.Parallel() + + h := harness.NewT(t) + node := h.NewNode().Init() + node.SetIPFSConfig("Provide.DHT.SweepEnabled", true) + node.SetIPFSConfig("Provide.Enabled", true) + node.StartDaemon() + defer node.StopDaemon() + + res := node.IPFS("provide", "stat", "--enc=json") + require.NoError(t, res.Err) + + // Parse JSON to verify structure + var result struct { + Sweep map[string]interface{} `json:"Sweep"` + Legacy map[string]interface{} `json:"Legacy"` + } + err := json.Unmarshal([]byte(res.Stdout.String()), &result) + require.NoError(t, err, "Output should be valid JSON") + assert.NotNil(t, result.Sweep, "Sweep stats should be present") + assert.Nil(t, result.Legacy, "Legacy stats should not be present") + }) + + t.Run("JSON output with Legacy provider", func(t *testing.T) { + t.Parallel() + + h := harness.NewT(t) + node := h.NewNode().Init() + node.SetIPFSConfig("Provide.DHT.SweepEnabled", false) + node.SetIPFSConfig("Provide.Enabled", true) + node.StartDaemon() + defer node.StopDaemon() + + res := node.IPFS("provide", "stat", "--enc=json") + require.NoError(t, res.Err) + + // Parse JSON to verify structure + var result struct { + Sweep map[string]interface{} `json:"Sweep"` + Legacy map[string]interface{} `json:"Legacy"` + } + err := json.Unmarshal([]byte(res.Stdout.String()), &result) + require.NoError(t, err, "Output should be valid JSON") + assert.Nil(t, result.Sweep, "Sweep stats should not be present") + assert.NotNil(t, result.Legacy, "Legacy stats should be present") + }) +} + +// TestProvideStatIntegration tests integration with provide operations +func TestProvideStatIntegration(t *testing.T) { + t.Parallel() + + t.Run("stats reflect content being added to schedule", func(t *testing.T) { + t.Parallel() + + h := harness.NewT(t) + node := h.NewNode().Init() + node.SetIPFSConfig("Provide.DHT.SweepEnabled", true) + node.SetIPFSConfig("Provide.Enabled", true) + node.SetIPFSConfig("Provide.DHT.Interval", "1h") + node.StartDaemon() + defer node.StopDaemon() + + // Get initial scheduled CID count + res1 := node.IPFS("provide", "stat", "--enc=json") + require.NoError(t, res1.Err) + initialKeys := parseSweepStats(t, res1.Stdout.String()).Sweep.Schedule.Keys + + // Add content - this should increase CIDs scheduled + node.IPFSAddStr("test content for stats") + + // Wait for content to appear in schedule (with timeout) + // The buffered provider may take a moment to schedule items + require.Eventually(t, func() bool { + res := node.IPFS("provide", "stat", "--enc=json") + require.NoError(t, res.Err) + stats := parseSweepStats(t, res.Stdout.String()) + return stats.Sweep.Schedule.Keys > initialKeys + }, provideStatEventuallyTimeout, provideStatEventuallyTick, "Content should appear in schedule after adding") + }) + + t.Run("stats work with all documented strategies", func(t *testing.T) { + t.Parallel() + + // Test all strategies documented in docs/config.md#providestrategy + strategies := []string{"all", "pinned", "roots", "mfs", "pinned+mfs"} + for _, strategy := range strategies { + h := harness.NewT(t) + node := h.NewNode().Init() + node.SetIPFSConfig("Provide.DHT.SweepEnabled", true) + node.SetIPFSConfig("Provide.Enabled", true) + node.SetIPFSConfig("Provide.Strategy", strategy) + node.StartDaemon() + + res := node.IPFS("provide", "stat") + require.NoError(t, res.Err, "stats should work with strategy %s", strategy) + output := res.Stdout.String() + assert.NotEmpty(t, output) + assert.Contains(t, output, "CIDs scheduled:") + + node.StopDaemon() + } + }) +} + +// TestProvideStatDisabledConfig tests behavior when provide system is disabled +func TestProvideStatDisabledConfig(t *testing.T) { + t.Parallel() + + t.Run("Provide.Enabled=false returns error stats not available", func(t *testing.T) { + t.Parallel() + + h := harness.NewT(t) + node := h.NewNode().Init() + node.SetIPFSConfig("Provide.DHT.SweepEnabled", true) + node.SetIPFSConfig("Provide.Enabled", false) + node.StartDaemon() + defer node.StopDaemon() + + res := node.RunIPFS("provide", "stat") + assert.Error(t, res.Err) + assert.Contains(t, res.Stderr.String(), "stats not available") + }) + + t.Run("Provide.Enabled=true with Provide.DHT.Interval=0 returns error stats not available", func(t *testing.T) { + t.Parallel() + + h := harness.NewT(t) + node := h.NewNode().Init() + node.SetIPFSConfig("Provide.DHT.SweepEnabled", true) + node.SetIPFSConfig("Provide.Enabled", true) + node.SetIPFSConfig("Provide.DHT.Interval", "0") + node.StartDaemon() + defer node.StopDaemon() + + res := node.RunIPFS("provide", "stat") + assert.Error(t, res.Err) + assert.Contains(t, res.Stderr.String(), "stats not available") + }) +} diff --git a/test/cli/provider_test.go b/test/cli/provider_test.go index debeddcd0..ccd164860 100644 --- a/test/cli/provider_test.go +++ b/test/cli/provider_test.go @@ -3,6 +3,7 @@ package cli import ( "bytes" "encoding/json" + "fmt" "net/http" "net/http/httptest" "strings" @@ -608,6 +609,124 @@ func runProviderSuite(t *testing.T, reprovide bool, apply cfgApplier) { }) } +// runResumeTests validates Provide.DHT.ResumeEnabled behavior for SweepingProvider. +// +// Background: The provider tracks current_time_offset = (now - cycleStart) % interval +// where cycleStart is the timestamp marking the beginning of the reprovide cycle. +// With ResumeEnabled=true, cycleStart persists in the datastore across restarts. +// With ResumeEnabled=false, cycleStart resets to 'now' on each startup. +func runResumeTests(t *testing.T, apply cfgApplier) { + t.Helper() + + const ( + reprovideInterval = 30 * time.Second + initialRuntime = 10 * time.Second // Let cycle progress + downtime = 5 * time.Second // Simulated offline period + restartTime = 2 * time.Second // Daemon restart stabilization + + // Thresholds account for timing jitter (~2-3s margin) + minOffsetBeforeRestart = 8 * time.Second // Expect ~10s + minOffsetAfterResume = 12 * time.Second // Expect ~17s (10s + 5s + 2s) + maxOffsetAfterReset = 5 * time.Second // Expect ~2s (fresh start) + ) + + setupNode := func(t *testing.T, resumeEnabled bool) *harness.Node { + node := harness.NewT(t).NewNode().Init() + apply(node) // Sets Provide.DHT.SweepEnabled=true + node.SetIPFSConfig("Provide.DHT.ResumeEnabled", resumeEnabled) + node.SetIPFSConfig("Provide.DHT.Interval", reprovideInterval.String()) + node.SetIPFSConfig("Bootstrap", []string{}) + node.StartDaemon() + return node + } + + t.Run("preserves cycle state across restart", func(t *testing.T) { + t.Parallel() + + node := setupNode(t, true) + defer node.StopDaemon() + + for i := 0; i < 10; i++ { + node.IPFSAddStr(fmt.Sprintf("resume-test-%d-%d", i, time.Now().UnixNano())) + } + + time.Sleep(initialRuntime) + + beforeRestart := node.IPFS("provide", "stat", "--enc=json") + offsetBeforeRestart, _, err := parseProvideStatJSON(beforeRestart.Stdout.String()) + require.NoError(t, err) + require.Greater(t, offsetBeforeRestart, minOffsetBeforeRestart, + "cycle should have progressed") + + node.StopDaemon() + time.Sleep(downtime) + node.StartDaemon() + time.Sleep(restartTime) + + afterRestart := node.IPFS("provide", "stat", "--enc=json") + offsetAfterRestart, _, err := parseProvideStatJSON(afterRestart.Stdout.String()) + require.NoError(t, err) + + assert.GreaterOrEqual(t, offsetAfterRestart, minOffsetAfterResume, + "offset should account for downtime") + }) + + t.Run("resets cycle when disabled", func(t *testing.T) { + t.Parallel() + + node := setupNode(t, false) + defer node.StopDaemon() + + for i := 0; i < 10; i++ { + node.IPFSAddStr(fmt.Sprintf("no-resume-%d-%d", i, time.Now().UnixNano())) + } + + time.Sleep(initialRuntime) + + beforeRestart := node.IPFS("provide", "stat", "--enc=json") + offsetBeforeRestart, _, err := parseProvideStatJSON(beforeRestart.Stdout.String()) + require.NoError(t, err) + require.Greater(t, offsetBeforeRestart, minOffsetBeforeRestart, + "cycle should have progressed") + + node.StopDaemon() + time.Sleep(downtime) + node.StartDaemon() + time.Sleep(restartTime) + + afterRestart := node.IPFS("provide", "stat", "--enc=json") + offsetAfterRestart, _, err := parseProvideStatJSON(afterRestart.Stdout.String()) + require.NoError(t, err) + + assert.Less(t, offsetAfterRestart, maxOffsetAfterReset, + "offset should reset to near zero") + }) +} + +type provideStatJSON struct { + Sweep struct { + Timing struct { + CurrentTimeOffset int64 `json:"current_time_offset"` // nanoseconds + } `json:"timing"` + Schedule struct { + NextReprovidePrefix string `json:"next_reprovide_prefix"` + } `json:"schedule"` + } `json:"Sweep"` +} + +// parseProvideStatJSON extracts timing and schedule information from +// the JSON output of 'ipfs provide stat --enc=json'. +// Note: prefix is unused in current tests but kept for potential future use. +func parseProvideStatJSON(output string) (offset time.Duration, prefix string, err error) { + var stat provideStatJSON + if err := json.Unmarshal([]byte(output), &stat); err != nil { + return 0, "", err + } + offset = time.Duration(stat.Sweep.Timing.CurrentTimeOffset) + prefix = stat.Sweep.Schedule.NextReprovidePrefix + return offset, prefix, nil +} + func TestProvider(t *testing.T) { t.Parallel() @@ -637,6 +756,11 @@ func TestProvider(t *testing.T) { t.Run(v.name, func(t *testing.T) { // t.Parallel() runProviderSuite(t, v.reprovide, v.apply) + + // Resume tests only apply to SweepingProvider + if v.name == "SweepingProvider" { + runResumeTests(t, v.apply) + } }) } } diff --git a/test/dependencies/go.mod b/test/dependencies/go.mod index 33d79dcd8..29ab89f02 100644 --- a/test/dependencies/go.mod +++ b/test/dependencies/go.mod @@ -65,7 +65,7 @@ require ( github.com/cockroachdb/crlib v0.0.0-20241112164430-1264a2edc35b // indirect github.com/cockroachdb/errors v1.11.3 // indirect github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b // indirect - github.com/cockroachdb/pebble/v2 v2.1.0 // indirect + github.com/cockroachdb/pebble/v2 v2.1.1 // indirect github.com/cockroachdb/redact v1.1.5 // indirect github.com/cockroachdb/swiss v0.0.0-20250624142022-d6e517c1d961 // indirect github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 // indirect @@ -182,9 +182,9 @@ require ( github.com/libp2p/go-cidranger v1.1.0 // indirect github.com/libp2p/go-doh-resolver v0.5.0 // indirect github.com/libp2p/go-flow-metrics v0.3.0 // indirect - github.com/libp2p/go-libp2p v0.43.0 // indirect + github.com/libp2p/go-libp2p v0.44.0 // indirect github.com/libp2p/go-libp2p-asn-util v0.4.1 // indirect - github.com/libp2p/go-libp2p-kad-dht v0.35.1 // indirect + github.com/libp2p/go-libp2p-kad-dht v0.35.2-0.20251025120456-f33906fd2f32 // indirect github.com/libp2p/go-libp2p-kbucket v0.8.0 // indirect github.com/libp2p/go-libp2p-record v0.3.1 // indirect github.com/libp2p/go-libp2p-routing-helpers v0.7.5 // indirect @@ -258,9 +258,7 @@ require ( github.com/quasilyte/gogrep v0.5.0 // indirect github.com/quasilyte/regex/syntax v0.0.0-20210819130434-b3f0c404a727 // indirect github.com/quasilyte/stdinfo v0.0.0-20220114132959-f7386bf02567 // indirect - github.com/quic-go/qpack v0.5.1 // indirect github.com/quic-go/quic-go v0.55.0 // indirect - github.com/quic-go/webtransport-go v0.9.0 // indirect github.com/raeperd/recvcheck v0.2.0 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/rogpeppe/go-internal v1.14.1 // indirect diff --git a/test/dependencies/go.sum b/test/dependencies/go.sum index e13f04a4d..99b07b139 100644 --- a/test/dependencies/go.sum +++ b/test/dependencies/go.sum @@ -118,8 +118,8 @@ github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b h1:r6VH0faHjZe github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs= github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895 h1:XANOgPYtvELQ/h4IrmPAohXqe2pWA8Bwhejr3VQoZsA= github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895/go.mod h1:aPd7gM9ov9M8v32Yy5NJrDyOcD8z642dqs+F0CeNXfA= -github.com/cockroachdb/pebble/v2 v2.1.0 h1:6KZvjSpWcEXZUvlLzTRC7T1A2G7r+bFskIzggklxixo= -github.com/cockroachdb/pebble/v2 v2.1.0/go.mod h1:Aza05DCCc05ghIJZkB4Q/axv/JK9wx5cFwWcnhG0eGw= +github.com/cockroachdb/pebble/v2 v2.1.1 h1:sUpUJjorLDSL4zIRFqoduCBaf2LewaMUXOoOpK+MrXQ= +github.com/cockroachdb/pebble/v2 v2.1.1/go.mod h1:Aza05DCCc05ghIJZkB4Q/axv/JK9wx5cFwWcnhG0eGw= github.com/cockroachdb/redact v1.1.5 h1:u1PMllDkdFfPWaNGMyLD1+so+aq3uUItthCFqzwPJ30= github.com/cockroachdb/redact v1.1.5/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= github.com/cockroachdb/swiss v0.0.0-20250624142022-d6e517c1d961 h1:Nua446ru3juLHLZd4AwKNzClZgL1co3pUPGv3o8FlcA= @@ -458,12 +458,12 @@ github.com/libp2p/go-doh-resolver v0.5.0 h1:4h7plVVW+XTS+oUBw2+8KfoM1jF6w8XmO7+s github.com/libp2p/go-doh-resolver v0.5.0/go.mod h1:aPDxfiD2hNURgd13+hfo29z9IC22fv30ee5iM31RzxU= github.com/libp2p/go-flow-metrics v0.3.0 h1:q31zcHUvHnwDO0SHaukewPYgwOBSxtt830uJtUx6784= github.com/libp2p/go-flow-metrics v0.3.0/go.mod h1:nuhlreIwEguM1IvHAew3ij7A8BMlyHQJ279ao24eZZo= -github.com/libp2p/go-libp2p v0.43.0 h1:b2bg2cRNmY4HpLK8VHYQXLX2d3iND95OjodLFymvqXU= -github.com/libp2p/go-libp2p v0.43.0/go.mod h1:IiSqAXDyP2sWH+J2gs43pNmB/y4FOi2XQPbsb+8qvzc= +github.com/libp2p/go-libp2p v0.44.0 h1:5Gtt8OrF8yiXmH+Mx4+/iBeFRMK1TY3a8OrEBDEqAvs= +github.com/libp2p/go-libp2p v0.44.0/go.mod h1:NovCojezAt4dnDd4fH048K7PKEqH0UFYYqJRjIIu8zc= github.com/libp2p/go-libp2p-asn-util v0.4.1 h1:xqL7++IKD9TBFMgnLPZR6/6iYhawHKHl950SO9L6n94= github.com/libp2p/go-libp2p-asn-util v0.4.1/go.mod h1:d/NI6XZ9qxw67b4e+NgpQexCIiFYJjErASrYW4PFDN8= -github.com/libp2p/go-libp2p-kad-dht v0.35.1 h1:RQglhc9OxqDwlFFdhQMwKxIPBIBfGsleROnK5hqVsoE= -github.com/libp2p/go-libp2p-kad-dht v0.35.1/go.mod h1:1oCXzkkBiYh3d5cMWLpInSOZ6am2AlpC4G+GDcZFcE0= +github.com/libp2p/go-libp2p-kad-dht v0.35.2-0.20251025120456-f33906fd2f32 h1:xZj18PsLD157snR/BFo547jwOkGDH7jZjMEkBDOoD4Q= +github.com/libp2p/go-libp2p-kad-dht v0.35.2-0.20251025120456-f33906fd2f32/go.mod h1:aHMTg23iseX9grGSfA5gFUzLrqzmYbA8PqgGPqM8VkI= github.com/libp2p/go-libp2p-kbucket v0.8.0 h1:QAK7RzKJpYe+EuSEATAaaHYMYLkPDGC18m9jxPLnU8s= github.com/libp2p/go-libp2p-kbucket v0.8.0/go.mod h1:JMlxqcEyKwO6ox716eyC0hmiduSWZZl6JY93mGaaqc4= github.com/libp2p/go-libp2p-record v0.3.1 h1:cly48Xi5GjNw5Wq+7gmjfBiG9HCzQVkiZOUZ8kUl+Fg= @@ -490,6 +490,8 @@ github.com/maratori/testableexamples v1.0.0 h1:dU5alXRrD8WKSjOUnmJZuzdxWOEQ57+7s github.com/maratori/testableexamples v1.0.0/go.mod h1:4rhjL1n20TUTT4vdh3RDqSizKLyXp7K2u6HgraZCGzE= github.com/maratori/testpackage v1.1.1 h1:S58XVV5AD7HADMmD0fNnziNHqKvSdDuEKdPD1rNTU04= github.com/maratori/testpackage v1.1.1/go.mod h1:s4gRK/ym6AMrqpOa/kEbQTV4Q4jb7WeLZzVhVVVOQMc= +github.com/marcopolo/simnet v0.0.1 h1:rSMslhPz6q9IvJeFWDoMGxMIrlsbXau3NkuIXHGJxfg= +github.com/marcopolo/simnet v0.0.1/go.mod h1:WDaQkgLAjqDUEBAOXz22+1j6wXKfGlC5sD5XWt3ddOs= github.com/marten-seemann/tcp v0.0.0-20210406111302-dfbc87cc63fd h1:br0buuQ854V8u83wA0rVZ8ttrq5CpaPZdvrK0LP2lOk= github.com/marten-seemann/tcp v0.0.0-20210406111302-dfbc87cc63fd/go.mod h1:QuCEs1Nt24+FYQEqAAncTDPJIuGs+LxK1MCiFL25pMU= github.com/matoous/godox v1.1.0 h1:W5mqwbyWrwZv6OQ5Z1a/DHGMOvXYCBP3+Ht7KMoJhq4= diff --git a/version.go b/version.go index 364e65c7b..9dac78644 100644 --- a/version.go +++ b/version.go @@ -11,7 +11,7 @@ import ( var CurrentCommit string // CurrentVersionNumber is the current application's version literal. -const CurrentVersionNumber = "0.38.2" +const CurrentVersionNumber = "0.39.0-dev" const ApiVersion = "/kubo/" + CurrentVersionNumber + "/" //nolint