feat: optimize docker builds (#10925)

* feat(docker): optimize Dockerfile for faster builds - add BuildKit syntax directive for advanced caching features - implement cache mounts for Go modules and build cache - reduce layers by combining RUN commands (5→2 in final stage) - optimize apt-get with --no-install-recommends flag - use COPY --chmod to avoid separate permission fixing Performance improvements: - incremental builds after code changes: ~8.6x faster (1m51s → 13s) - go module/build cache persists between builds - reduced layer count improves cache efficiency * ci: optimize Docker builds with BuildKit caching - enable BuildKit with GitHub Actions cache backend - add Docker Hub registry cache for cross-workflow sharing - move Docker login earlier to enable registry cache writes - use dual cache strategy (gha + registry) for faster builds expected improvements: - PR builds can reuse main branch cache from Docker Hub - rebuild after code changes ~5-10x faster with persistent cache - cross-PR cache sharing reduces redundant builds
2026-02-21 10:27:46 +08:00 · 2025-08-21 14:44:19 +02:00 · 2025-08-21 14:44:19 +02:00 · c12d24949c
commit c12d24949c
parent ccb49de852
3 changed files with 74 additions and 76 deletions
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@ -27,8 +27,21 @@ jobs:
        shell: bash
    steps:
      - uses: actions/checkout@v5
-      - uses: actions/setup-go@v5
+      
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      
+      - name: Build Docker image with BuildKit
+        uses: docker/build-push-action@v6
        with:
-          go-version: 1.25.x
-      - run: docker build -t $IMAGE_NAME:$WIP_IMAGE_TAG .
-      - run: docker run --rm $IMAGE_NAME:$WIP_IMAGE_TAG --version
+          context: .
+          push: false
+          load: true
+          tags: ${{ env.IMAGE_NAME }}:${{ env.WIP_IMAGE_TAG }}
+          cache-from: |
+            type=gha
+            type=registry,ref=${{ env.IMAGE_NAME }}:buildcache
+          cache-to: type=gha,mode=max
+      
+      - name: Test Docker image
+        run: docker run --rm $IMAGE_NAME:$WIP_IMAGE_TAG --version
--- a/.github/workflows/docker-image.yml
+++ b/.github/workflows/docker-image.yml
@ -46,13 +46,11 @@ jobs:
      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

-      - name: Cache Docker layers
-        uses: actions/cache@v4
+      - name: Log in to Docker Hub
+        uses: docker/login-action@v3
        with:
-          path: /tmp/.buildx-cache
-          key: ${{ runner.os }}-buildx-${{ github.sha }}
-          restore-keys: |
-            ${{ runner.os }}-buildx-
+          username: ${{ vars.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}

      - name: Get tags
        id: tags
@ -63,12 +61,6 @@ jobs:
          echo "EOF" >> $GITHUB_OUTPUT
        shell: bash

-      - name: Log in to Docker Hub
-        uses: docker/login-action@v3
-        with:
-          username: ${{ vars.DOCKER_USERNAME }}
-          password: ${{ secrets.DOCKER_PASSWORD }}
-
      # We have to build each platform separately because when using multi-arch
      # builds, only one platform is being loaded into the cache. This would
      # prevent us from testing the other platforms.
@ -81,8 +73,10 @@ jobs:
          load: true
          file: ./Dockerfile
          tags: ${{ env.IMAGE_NAME }}:linux-amd64
-          cache-from: type=local,src=/tmp/.buildx-cache
-          cache-to: type=local,dest=/tmp/.buildx-cache-new
+          cache-from: |
+            type=gha
+            type=registry,ref=${{ env.IMAGE_NAME }}:buildcache
+          cache-to: type=gha,mode=max

      - name: Build Docker image (linux/arm/v7)
        uses: docker/build-push-action@v6
@ -93,8 +87,10 @@ jobs:
          load: true
          file: ./Dockerfile
          tags: ${{ env.IMAGE_NAME }}:linux-arm-v7
-          cache-from: type=local,src=/tmp/.buildx-cache
-          cache-to: type=local,dest=/tmp/.buildx-cache-new
+          cache-from: |
+            type=gha
+            type=registry,ref=${{ env.IMAGE_NAME }}:buildcache
+          cache-to: type=gha,mode=max

      - name: Build Docker image (linux/arm64/v8)
        uses: docker/build-push-action@v6
@ -105,8 +101,10 @@ jobs:
          load: true
          file: ./Dockerfile
          tags: ${{ env.IMAGE_NAME }}:linux-arm64-v8
-          cache-from: type=local,src=/tmp/.buildx-cache
-          cache-to: type=local,dest=/tmp/.buildx-cache-new
+          cache-from: |
+            type=gha
+            type=registry,ref=${{ env.IMAGE_NAME }}:buildcache
+          cache-to: type=gha,mode=max

      # We test all the images on amd64 host here. This uses QEMU to emulate
      # the other platforms.
@ -132,12 +130,9 @@ jobs:
          push: true
          file: ./Dockerfile
          tags: "${{ github.event.inputs.tags || steps.tags.outputs.value }}"
-          cache-from: type=local,src=/tmp/.buildx-cache-new
-          cache-to: type=local,dest=/tmp/.buildx-cache-new
-
-      # https://github.com/docker/build-push-action/issues/252
-      # https://github.com/moby/buildkit/issues/1896
-      - name: Move cache to limit growth
-        run: |
-          rm -rf /tmp/.buildx-cache
-          mv /tmp/.buildx-cache-new /tmp/.buildx-cache
+          cache-from: |
+            type=gha
+            type=registry,ref=${{ env.IMAGE_NAME }}:buildcache
+          cache-to: |
+            type=gha,mode=max
+            type=registry,ref=${{ env.IMAGE_NAME }}:buildcache,mode=max
--- a/80
+++ b/80
@ -1,12 +1,15 @@
+# syntax=docker/dockerfile:1
+# Enables BuildKit with cache mounts for faster builds
 FROM --platform=${BUILDPLATFORM:-linux/amd64} golang:1.25 AS builder

 ARG TARGETOS TARGETARCH

 ENV SRC_DIR=/kubo

-# Download packages first so they can be cached.
+# Cache go module downloads between builds for faster rebuilds
 COPY go.mod go.sum $SRC_DIR/
-RUN cd $SRC_DIR \
+RUN --mount=type=cache,target=/go/pkg/mod \
+  cd $SRC_DIR \
  && go mod download

 COPY . $SRC_DIR
@ -18,92 +21,79 @@ ARG IPFS_PLUGINS
 # Allow for other targets to be built, e.g.: docker build --build-arg MAKE_TARGET="nofuse"
 ARG MAKE_TARGET=build

-# Build the thing.
-# Also: fix getting HEAD commit hash via git rev-parse.
-RUN cd $SRC_DIR \
+# Build ipfs binary with cached go modules and build cache.
+# mkdir .git/objects allows git rev-parse to read commit hash for version info
+RUN --mount=type=cache,target=/go/pkg/mod \
+  --mount=type=cache,target=/root/.cache/go-build \
+  cd $SRC_DIR \
  && mkdir -p .git/objects \
  && GOOS=$TARGETOS GOARCH=$TARGETARCH GOFLAGS=-buildvcs=false make ${MAKE_TARGET} IPFS_PLUGINS=$IPFS_PLUGINS

-# Using Debian Buster because the version of busybox we're using is based on it
-# and we want to make sure the libraries we're using are compatible. That's also
-# why we're running this for the target platform.
+# Extract required runtime tools from Debian.
+# We use Debian instead of Alpine because we need glibc compatibility
+# for the busybox base image we're using.
 FROM debian:bookworm-slim AS utilities
 RUN set -eux; \
 	apt-get update; \
-	apt-get install -y \
+	apt-get install -y --no-install-recommends \
 		tini \
    # Using gosu (~2MB) instead of su-exec (~20KB) because it's easier to
    # install on Debian. Useful links:
    # - https://github.com/ncopa/su-exec#why-reinvent-gosu
    # - https://github.com/tianon/gosu/issues/52#issuecomment-441946745
 		gosu \
-    # This installs fusermount which we later copy over to the target image.
+    # fusermount enables IPFS mount commands
    fuse \
    ca-certificates \
 	; \
-	rm -rf /var/lib/apt/lists/*
+	apt-get clean; \
+	rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

-# Now comes the actual target image, which aims to be as small as possible.
+# Final minimal image with shell for debugging (busybox provides sh)
 FROM busybox:stable-glibc

-# Get the ipfs binary, entrypoint script, and TLS CAs from the build container.
+# Copy ipfs binary, startup scripts, and runtime dependencies
 ENV SRC_DIR=/kubo
 COPY --from=utilities /usr/sbin/gosu /sbin/gosu
 COPY --from=utilities /usr/bin/tini /sbin/tini
 COPY --from=utilities /bin/fusermount /usr/local/bin/fusermount
 COPY --from=utilities /etc/ssl/certs /etc/ssl/certs
 COPY --from=builder $SRC_DIR/cmd/ipfs/ipfs /usr/local/bin/ipfs
-COPY --from=builder $SRC_DIR/bin/container_daemon /usr/local/bin/start_ipfs
+COPY --from=builder --chmod=755 $SRC_DIR/bin/container_daemon /usr/local/bin/start_ipfs
 COPY --from=builder $SRC_DIR/bin/container_init_run /usr/local/bin/container_init_run

-# Add suid bit on fusermount so it will run properly
+# Set SUID for fusermount to enable FUSE mounting by non-root user
 RUN chmod 4755 /usr/local/bin/fusermount

-# Fix permissions on start_ipfs (ignore the build machine's permissions)
-RUN chmod 0755 /usr/local/bin/start_ipfs
-
-# Swarm TCP; should be exposed to the public
-EXPOSE 4001
-# Swarm UDP; should be exposed to the public
-EXPOSE 4001/udp
-# Daemon API; must not be exposed publicly but to client services under you control
+# Swarm P2P port (TCP/UDP) - expose publicly for peer connections
+EXPOSE 4001 4001/udp
+# API port - keep private, only for trusted clients
 EXPOSE 5001
-# Web Gateway; can be exposed publicly with a proxy, e.g. as https://ipfs.example.org
+# Gateway port - can be exposed publicly via reverse proxy
 EXPOSE 8080
-# Swarm Websockets; must be exposed publicly when the node is listening using the websocket transport (/ipX/.../tcp/8081/ws).
+# Swarm WebSockets - expose publicly for browser-based peers
 EXPOSE 8081

-# Create the fs-repo directory and switch to a non-privileged user.
+# Create ipfs user (uid 1000) and required directories with proper ownership
 ENV IPFS_PATH=/data/ipfs
-RUN mkdir -p $IPFS_PATH \
+RUN mkdir -p $IPFS_PATH /ipfs /ipns /mfs /container-init.d \
  && adduser -D -h $IPFS_PATH -u 1000 -G users ipfs \
-  && chown ipfs:users $IPFS_PATH
+  && chown ipfs:users $IPFS_PATH /ipfs /ipns /mfs /container-init.d

-# Create mount points for `ipfs mount` command
-RUN mkdir /ipfs /ipns /mfs \
-  && chown ipfs:users /ipfs /ipns /mfs
-
-# Create the init scripts directory
-RUN mkdir /container-init.d \
-  && chown ipfs:users /container-init.d
-
-# Expose the fs-repo as a volume.
-# start_ipfs initializes an fs-repo if none is mounted.
-# Important this happens after the USER directive so permissions are correct.
+# Volume for IPFS repository data persistence
 VOLUME $IPFS_PATH

 # The default logging level
 ENV GOLOG_LOG_LEVEL=""

-# This just makes sure that:
-# 1. There's an fs-repo, and initializes one if there isn't.
-# 2. The API and Gateway are accessible from outside the container.
+# Entrypoint initializes IPFS repo if needed and configures networking.
+# tini ensures proper signal handling and zombie process cleanup
 ENTRYPOINT ["/sbin/tini", "--", "/usr/local/bin/start_ipfs"]

-# Healthcheck for the container
-# QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn is the CID of empty folder
+# Health check verifies IPFS daemon is responsive.
+# Uses empty directory CID (QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn) as test
 HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
  CMD ipfs --api=/ip4/127.0.0.1/tcp/5001 dag stat /ipfs/QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn || exit 1

-# Execute the daemon subcommand by default
+# Default: run IPFS daemon with auto-migration enabled
 CMD ["daemon", "--migrate=true", "--agent-version-suffix=docker"]