From c12d24949c4acfcfe53a2984cf6d67968f609132 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Thu, 21 Aug 2025 14:44:19 +0200 Subject: [PATCH] feat: optimize docker builds (#10925) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(docker): optimize Dockerfile for faster builds - add BuildKit syntax directive for advanced caching features - implement cache mounts for Go modules and build cache - reduce layers by combining RUN commands (5→2 in final stage) - optimize apt-get with --no-install-recommends flag - use COPY --chmod to avoid separate permission fixing Performance improvements: - incremental builds after code changes: ~8.6x faster (1m51s → 13s) - go module/build cache persists between builds - reduced layer count improves cache efficiency * ci: optimize Docker builds with BuildKit caching - enable BuildKit with GitHub Actions cache backend - add Docker Hub registry cache for cross-workflow sharing - move Docker login earlier to enable registry cache writes - use dual cache strategy (gha + registry) for faster builds expected improvements: - PR builds can reuse main branch cache from Docker Hub - rebuild after code changes ~5-10x faster with persistent cache - cross-PR cache sharing reduces redundant builds --- .github/workflows/docker-build.yml | 21 ++++++-- .github/workflows/docker-image.yml | 49 ++++++++---------- Dockerfile | 80 +++++++++++++----------------- 3 files changed, 74 insertions(+), 76 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index bd1a5cf41..24ece3fa4 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -27,8 +27,21 @@ jobs: shell: bash steps: - uses: actions/checkout@v5 - - uses: actions/setup-go@v5 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build Docker image with BuildKit + uses: docker/build-push-action@v6 with: - go-version: 1.25.x - - run: docker build -t $IMAGE_NAME:$WIP_IMAGE_TAG . - - run: docker run --rm $IMAGE_NAME:$WIP_IMAGE_TAG --version + context: . + push: false + load: true + tags: ${{ env.IMAGE_NAME }}:${{ env.WIP_IMAGE_TAG }} + cache-from: | + type=gha + type=registry,ref=${{ env.IMAGE_NAME }}:buildcache + cache-to: type=gha,mode=max + + - name: Test Docker image + run: docker run --rm $IMAGE_NAME:$WIP_IMAGE_TAG --version diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 8aa0dd77b..6d89c2980 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -46,13 +46,11 @@ jobs: - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - - name: Cache Docker layers - uses: actions/cache@v4 + - name: Log in to Docker Hub + uses: docker/login-action@v3 with: - path: /tmp/.buildx-cache - key: ${{ runner.os }}-buildx-${{ github.sha }} - restore-keys: | - ${{ runner.os }}-buildx- + username: ${{ vars.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} - name: Get tags id: tags @@ -63,12 +61,6 @@ jobs: echo "EOF" >> $GITHUB_OUTPUT shell: bash - - name: Log in to Docker Hub - uses: docker/login-action@v3 - with: - username: ${{ vars.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - # We have to build each platform separately because when using multi-arch # builds, only one platform is being loaded into the cache. This would # prevent us from testing the other platforms. @@ -81,8 +73,10 @@ jobs: load: true file: ./Dockerfile tags: ${{ env.IMAGE_NAME }}:linux-amd64 - cache-from: type=local,src=/tmp/.buildx-cache - cache-to: type=local,dest=/tmp/.buildx-cache-new + cache-from: | + type=gha + type=registry,ref=${{ env.IMAGE_NAME }}:buildcache + cache-to: type=gha,mode=max - name: Build Docker image (linux/arm/v7) uses: docker/build-push-action@v6 @@ -93,8 +87,10 @@ jobs: load: true file: ./Dockerfile tags: ${{ env.IMAGE_NAME }}:linux-arm-v7 - cache-from: type=local,src=/tmp/.buildx-cache - cache-to: type=local,dest=/tmp/.buildx-cache-new + cache-from: | + type=gha + type=registry,ref=${{ env.IMAGE_NAME }}:buildcache + cache-to: type=gha,mode=max - name: Build Docker image (linux/arm64/v8) uses: docker/build-push-action@v6 @@ -105,8 +101,10 @@ jobs: load: true file: ./Dockerfile tags: ${{ env.IMAGE_NAME }}:linux-arm64-v8 - cache-from: type=local,src=/tmp/.buildx-cache - cache-to: type=local,dest=/tmp/.buildx-cache-new + cache-from: | + type=gha + type=registry,ref=${{ env.IMAGE_NAME }}:buildcache + cache-to: type=gha,mode=max # We test all the images on amd64 host here. This uses QEMU to emulate # the other platforms. @@ -132,12 +130,9 @@ jobs: push: true file: ./Dockerfile tags: "${{ github.event.inputs.tags || steps.tags.outputs.value }}" - cache-from: type=local,src=/tmp/.buildx-cache-new - cache-to: type=local,dest=/tmp/.buildx-cache-new - - # https://github.com/docker/build-push-action/issues/252 - # https://github.com/moby/buildkit/issues/1896 - - name: Move cache to limit growth - run: | - rm -rf /tmp/.buildx-cache - mv /tmp/.buildx-cache-new /tmp/.buildx-cache + cache-from: | + type=gha + type=registry,ref=${{ env.IMAGE_NAME }}:buildcache + cache-to: | + type=gha,mode=max + type=registry,ref=${{ env.IMAGE_NAME }}:buildcache,mode=max diff --git a/Dockerfile b/Dockerfile index de66c7867..0db5f33b4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,15 @@ +# syntax=docker/dockerfile:1 +# Enables BuildKit with cache mounts for faster builds FROM --platform=${BUILDPLATFORM:-linux/amd64} golang:1.25 AS builder ARG TARGETOS TARGETARCH ENV SRC_DIR=/kubo -# Download packages first so they can be cached. +# Cache go module downloads between builds for faster rebuilds COPY go.mod go.sum $SRC_DIR/ -RUN cd $SRC_DIR \ +RUN --mount=type=cache,target=/go/pkg/mod \ + cd $SRC_DIR \ && go mod download COPY . $SRC_DIR @@ -18,92 +21,79 @@ ARG IPFS_PLUGINS # Allow for other targets to be built, e.g.: docker build --build-arg MAKE_TARGET="nofuse" ARG MAKE_TARGET=build -# Build the thing. -# Also: fix getting HEAD commit hash via git rev-parse. -RUN cd $SRC_DIR \ +# Build ipfs binary with cached go modules and build cache. +# mkdir .git/objects allows git rev-parse to read commit hash for version info +RUN --mount=type=cache,target=/go/pkg/mod \ + --mount=type=cache,target=/root/.cache/go-build \ + cd $SRC_DIR \ && mkdir -p .git/objects \ && GOOS=$TARGETOS GOARCH=$TARGETARCH GOFLAGS=-buildvcs=false make ${MAKE_TARGET} IPFS_PLUGINS=$IPFS_PLUGINS -# Using Debian Buster because the version of busybox we're using is based on it -# and we want to make sure the libraries we're using are compatible. That's also -# why we're running this for the target platform. +# Extract required runtime tools from Debian. +# We use Debian instead of Alpine because we need glibc compatibility +# for the busybox base image we're using. FROM debian:bookworm-slim AS utilities RUN set -eux; \ apt-get update; \ - apt-get install -y \ + apt-get install -y --no-install-recommends \ tini \ # Using gosu (~2MB) instead of su-exec (~20KB) because it's easier to # install on Debian. Useful links: # - https://github.com/ncopa/su-exec#why-reinvent-gosu # - https://github.com/tianon/gosu/issues/52#issuecomment-441946745 gosu \ - # This installs fusermount which we later copy over to the target image. + # fusermount enables IPFS mount commands fuse \ ca-certificates \ ; \ - rm -rf /var/lib/apt/lists/* + apt-get clean; \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* -# Now comes the actual target image, which aims to be as small as possible. +# Final minimal image with shell for debugging (busybox provides sh) FROM busybox:stable-glibc -# Get the ipfs binary, entrypoint script, and TLS CAs from the build container. +# Copy ipfs binary, startup scripts, and runtime dependencies ENV SRC_DIR=/kubo COPY --from=utilities /usr/sbin/gosu /sbin/gosu COPY --from=utilities /usr/bin/tini /sbin/tini COPY --from=utilities /bin/fusermount /usr/local/bin/fusermount COPY --from=utilities /etc/ssl/certs /etc/ssl/certs COPY --from=builder $SRC_DIR/cmd/ipfs/ipfs /usr/local/bin/ipfs -COPY --from=builder $SRC_DIR/bin/container_daemon /usr/local/bin/start_ipfs +COPY --from=builder --chmod=755 $SRC_DIR/bin/container_daemon /usr/local/bin/start_ipfs COPY --from=builder $SRC_DIR/bin/container_init_run /usr/local/bin/container_init_run -# Add suid bit on fusermount so it will run properly +# Set SUID for fusermount to enable FUSE mounting by non-root user RUN chmod 4755 /usr/local/bin/fusermount -# Fix permissions on start_ipfs (ignore the build machine's permissions) -RUN chmod 0755 /usr/local/bin/start_ipfs - -# Swarm TCP; should be exposed to the public -EXPOSE 4001 -# Swarm UDP; should be exposed to the public -EXPOSE 4001/udp -# Daemon API; must not be exposed publicly but to client services under you control +# Swarm P2P port (TCP/UDP) - expose publicly for peer connections +EXPOSE 4001 4001/udp +# API port - keep private, only for trusted clients EXPOSE 5001 -# Web Gateway; can be exposed publicly with a proxy, e.g. as https://ipfs.example.org +# Gateway port - can be exposed publicly via reverse proxy EXPOSE 8080 -# Swarm Websockets; must be exposed publicly when the node is listening using the websocket transport (/ipX/.../tcp/8081/ws). +# Swarm WebSockets - expose publicly for browser-based peers EXPOSE 8081 -# Create the fs-repo directory and switch to a non-privileged user. +# Create ipfs user (uid 1000) and required directories with proper ownership ENV IPFS_PATH=/data/ipfs -RUN mkdir -p $IPFS_PATH \ +RUN mkdir -p $IPFS_PATH /ipfs /ipns /mfs /container-init.d \ && adduser -D -h $IPFS_PATH -u 1000 -G users ipfs \ - && chown ipfs:users $IPFS_PATH + && chown ipfs:users $IPFS_PATH /ipfs /ipns /mfs /container-init.d -# Create mount points for `ipfs mount` command -RUN mkdir /ipfs /ipns /mfs \ - && chown ipfs:users /ipfs /ipns /mfs - -# Create the init scripts directory -RUN mkdir /container-init.d \ - && chown ipfs:users /container-init.d - -# Expose the fs-repo as a volume. -# start_ipfs initializes an fs-repo if none is mounted. -# Important this happens after the USER directive so permissions are correct. +# Volume for IPFS repository data persistence VOLUME $IPFS_PATH # The default logging level ENV GOLOG_LOG_LEVEL="" -# This just makes sure that: -# 1. There's an fs-repo, and initializes one if there isn't. -# 2. The API and Gateway are accessible from outside the container. +# Entrypoint initializes IPFS repo if needed and configures networking. +# tini ensures proper signal handling and zombie process cleanup ENTRYPOINT ["/sbin/tini", "--", "/usr/local/bin/start_ipfs"] -# Healthcheck for the container -# QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn is the CID of empty folder +# Health check verifies IPFS daemon is responsive. +# Uses empty directory CID (QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn) as test HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ CMD ipfs --api=/ip4/127.0.0.1/tcp/5001 dag stat /ipfs/QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn || exit 1 -# Execute the daemon subcommand by default +# Default: run IPFS daemon with auto-migration enabled CMD ["daemon", "--migrate=true", "--agent-version-suffix=docker"]