diff --git a/core/corehttp/gateway_handler.go b/core/corehttp/gateway_handler.go index 1262101be..d6e45ba92 100644 --- a/core/corehttp/gateway_handler.go +++ b/core/corehttp/gateway_handler.go @@ -327,6 +327,13 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request w.Header().Set("X-IPFS-Path", urlPath) w.Header().Set("Etag", responseEtag) + if rootCids, err := i.buildIpfsRootsHeader(urlPath, r); err == nil { + w.Header().Set("X-Ipfs-Roots", rootCids) + } else { // this should never happen, as we resolved the urlPath already + webError(w, "error while resolving X-Ipfs-Roots", err, http.StatusInternalServerError) + return + } + // set these headers _after_ the error, for we may just not have it // and don't want the client to cache a 500 response... // and only if it's /ipfs! @@ -391,6 +398,9 @@ func (i *gatewayHandler) getOrHeadHandler(w http.ResponseWriter, r *http.Request internalWebError(w, files.ErrNotReader) return } + // static index.html → no need to generate dynamic dir-index-html + // replace mutable DirIndex Etag with immutable dir CID + w.Header().Set("Etag", `"`+resolvedPath.Cid().String()+`"`) logger.Debugw("serving index.html file", "path", idxPath) // write to request @@ -785,6 +795,50 @@ func (i *gatewayHandler) addUserHeaders(w http.ResponseWriter) { } } +// Set X-Ipfs-Roots with logical CID array for efficient HTTP cache invalidation. +func (i *gatewayHandler) buildIpfsRootsHeader(contentPath string, r *http.Request) (string, error) { + /* + These are logical roots where each CID represent one path segment + and resolves to either a directory or the root block of a file. + The main purpose of this header is allow HTTP caches to do smarter decisions + around cache invalidation (eg. keep specific subdirectory/file if it did not change) + + A good example is Wikipedia, which is HAMT-sharded, but we only care about + logical roots that represent each segment of the human-readable content + path: + + Given contentPath = /ipns/en.wikipedia-on-ipfs.org/wiki/Block_of_Wikipedia_in_Turkey + rootCidList is a generated by doing `ipfs resolve -r` on each sub path: + /ipns/en.wikipedia-on-ipfs.org → bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze + /ipns/en.wikipedia-on-ipfs.org/wiki/ → bafybeihn2f7lhumh4grizksi2fl233cyszqadkn424ptjajfenykpsaiw4 + /ipns/en.wikipedia-on-ipfs.org/wiki/Block_of_Wikipedia_in_Turkey → bafkreibn6euazfvoghepcm4efzqx5l3hieof2frhp254hio5y7n3hv5rma + + The result is an ordered array of values: + X-Ipfs-Roots: bafybeiaysi4s6lnjev27ln5icwm6tueaw2vdykrtjkwiphwekaywqhcjze,bafybeihn2f7lhumh4grizksi2fl233cyszqadkn424ptjajfenykpsaiw4,bafkreibn6euazfvoghepcm4efzqx5l3hieof2frhp254hio5y7n3hv5rma + + Note that while the top one will change every time any article is changed, + the last root (responsible for specific article) may not change at all. + */ + var sp strings.Builder + var pathRoots []string + pathSegments := strings.Split(contentPath[6:], "/") + sp.WriteString(contentPath[:5]) // /ipfs or /ipns + for _, root := range pathSegments { + if root == "" { + continue + } + sp.WriteString("/") + sp.WriteString(root) + resolvedSubPath, err := i.api.ResolvePath(r.Context(), ipath.New(sp.String())) + if err != nil { + return "", err + } + pathRoots = append(pathRoots, resolvedSubPath.Cid().String()) + } + rootCidList := strings.Join(pathRoots, ",") // convention from rfc2616#sec4.2 + return rootCidList, nil +} + func webError(w http.ResponseWriter, message string, err error, defaultCode int) { if _, ok := err.(resolver.ErrNoLink); ok { webErrorWithCode(w, message, err, http.StatusNotFound) diff --git a/test/sharness/t0116-gateway-cache.sh b/test/sharness/t0116-gateway-cache.sh new file mode 100755 index 000000000..3e9e2af48 --- /dev/null +++ b/test/sharness/t0116-gateway-cache.sh @@ -0,0 +1,150 @@ +#!/usr/bin/env bash + +test_description="Test HTTP Gateway Cache Control Support" + +. lib/test-lib.sh + +test_init_ipfs +test_launch_ipfs_daemon_without_network + +# Cache control support is based on logical roots (each path segment == one logical root). +# To maximize the test surface, we want to test: +# - /ipfs/ content path +# - /ipns/ content path +# - at least 3 levels +# - separate tests for a directory listing and a file +# - have implicit index.html for a good measure +# /ipns/root1/root2/root3/ (/ipns/root1/root2/root3/index.html) + +# Note: we cover important edge case here: +# ROOT3_CID - dir listing (dir-index-html response) +# ROOT4_CID - index.html returned as a root response (dir/), instead of generated dir-index-html +# FILE_CID - index.html returned directly, as a file + +test_expect_success "Add the test directory" ' + mkdir -p root2/root3/root4 && + echo "hello" > root2/root3/root4/index.html && + ROOT1_CID=$(ipfs add -Qrw --cid-version 1 root2) + ROOT2_CID=$(ipfs resolve -r /ipfs/$ROOT1_CID/root2 | cut -d "/" -f3) + ROOT3_CID=$(ipfs resolve -r /ipfs/$ROOT1_CID/root2/root3 | cut -d "/" -f3) + ROOT4_CID=$(ipfs resolve -r /ipfs/$ROOT1_CID/root2/root3/root4 | cut -d "/" -f3) + FILE_CID=$(ipfs resolve -r /ipfs/$ROOT1_CID/root2/root3/root4/index.html | cut -d "/" -f3) +' + +test_expect_success "Prepare IPNS unixfs content path for testing" ' + TEST_IPNS_ID=$(ipfs key gen --ipns-base=base36 --type=ed25519 cache_test_key | head -n1 | tr -d "\n") + ipfs name publish --key cache_test_key --allow-offline -Q "/ipfs/$ROOT1_CID" > name_publish_out && + test_check_peerid "${TEST_IPNS_ID}" && + ipfs name resolve "${TEST_IPNS_ID}" > output && + printf "/ipfs/%s\n" "$ROOT1_CID" > expected && + test_cmp expected output +' + +# GET /ipfs/ + test_expect_success "GET for /ipfs/ unixfs dir listing succeeds" ' + curl -svX GET "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/" >/dev/null 2>curl_ipfs_dir_listing_output && + cat curl_ipfs_dir_listing_output + ' + test_expect_success "GET for /ipfs/ unixfs dir with index.html succeeds" ' + curl -svX GET "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/" >/dev/null 2>curl_ipfs_dir_index.html_output && + cat curl_ipfs_dir_index.html_output + ' + test_expect_success "GET for /ipfs/ unixfs file succeeds" ' + curl -svX GET "http://127.0.0.1:$GWAY_PORT/ipfs/$ROOT1_CID/root2/root3/root4/index.html" >/dev/null 2>curl_ipfs_file_output && + cat curl_ipfs_file_output + ' +# GET /ipns/ + test_expect_success "GET for /ipns/ unixfs dir listing succeeds" ' + curl -svX GET "http://127.0.0.1:$GWAY_PORT/ipns/$TEST_IPNS_ID/root2/root3/" >/dev/null 2>curl_ipns_dir_listing_output && + cat curl_ipns_dir_listing_output + ' + test_expect_success "GET for /ipns/ unixfs dir with index.html succeeds" ' + curl -svX GET "http://127.0.0.1:$GWAY_PORT/ipns/$TEST_IPNS_ID/root2/root3/root4/" >/dev/null 2>curl_ipns_dir_index.html_output && + cat curl_ipns_dir_index.html_output + ' + test_expect_success "GET for /ipns/ unixfs file succeeds" ' + curl -svX GET "http://127.0.0.1:$GWAY_PORT/ipns/$TEST_IPNS_ID/root2/root3/root4/index.html" >/dev/null 2>curl_ipns_file_output && + cat curl_ipns_file_output + ' + +# X-Ipfs-Path + + ## dir generated listing + test_expect_success "GET /ipfs/ dir listing response has original content path in X-Ipfs-Path" ' + grep "< X-Ipfs-Path: /ipfs/$ROOT1_CID/root2/root3" curl_ipfs_dir_listing_output + ' + test_expect_success "GET /ipns/ dir listing response has original content path in X-Ipfs-Path" ' + grep "< X-Ipfs-Path: /ipns/$TEST_IPNS_ID/root2/root3" curl_ipns_dir_listing_output + ' + + ## dir static index.html + test_expect_success "GET /ipfs/ dir index.html response has original content path in X-Ipfs-Path" ' + grep "< X-Ipfs-Path: /ipfs/$ROOT1_CID/root2/root3/root4/" curl_ipfs_dir_index.html_output + ' + test_expect_success "GET /ipns/ dir index.html response has original content path in X-Ipfs-Path" ' + grep "< X-Ipfs-Path: /ipns/$TEST_IPNS_ID/root2/root3/root4/" curl_ipns_dir_index.html_output + ' + + # file + test_expect_success "GET /ipfs/ file response has original content path in X-Ipfs-Path" ' + grep "< X-Ipfs-Path: /ipfs/$ROOT1_CID/root2/root3/root4/index.html" curl_ipfs_file_output + ' + test_expect_success "GET /ipns/ file response has original content path in X-Ipfs-Path" ' + grep "< X-Ipfs-Path: /ipns/$TEST_IPNS_ID/root2/root3/root4/index.html" curl_ipns_file_output + ' + +# X-Ipfs-Roots + + ## dir generated listing + test_expect_success "GET /ipfs/ dir listing response has logical CID roots in X-Ipfs-Roots" ' + grep "< X-Ipfs-Roots: ${ROOT1_CID},${ROOT2_CID},${ROOT3_CID}" curl_ipfs_dir_listing_output + ' + test_expect_success "GET /ipns/ dir listing response has logical CID roots in X-Ipfs-Roots" ' + grep "< X-Ipfs-Roots: ${ROOT1_CID},${ROOT2_CID},${ROOT3_CID}" curl_ipns_dir_listing_output + ' + + ## dir static index.html + test_expect_success "GET /ipfs/ dir index.html response has logical CID roots in X-Ipfs-Roots" ' + grep "< X-Ipfs-Roots: ${ROOT1_CID},${ROOT2_CID},${ROOT3_CID},${ROOT4_CID}" curl_ipfs_dir_index.html_output + ' + test_expect_success "GET /ipns/ dir index.html response has logical CID roots in X-Ipfs-Roots" ' + grep "< X-Ipfs-Roots: ${ROOT1_CID},${ROOT2_CID},${ROOT3_CID},${ROOT4_CID}" curl_ipns_dir_index.html_output + ' + + ## file + test_expect_success "GET /ipfs/ file response has logical CID roots in X-Ipfs-Roots" ' + grep "< X-Ipfs-Roots: ${ROOT1_CID},${ROOT2_CID},${ROOT3_CID},${ROOT4_CID},${FILE_CID}" curl_ipfs_file_output + ' + test_expect_success "GET /ipns/ file response has logical CID roots in X-Ipfs-Roots" ' + grep "< X-Ipfs-Roots: ${ROOT1_CID},${ROOT2_CID},${ROOT3_CID},${ROOT4_CID},${FILE_CID}" curl_ipns_file_output + ' + +# Etag + + ## dir generated listing + test_expect_success "GET /ipfs/ dir response has special Etag for generated dir listing" ' + grep -E "< Etag: \"DirIndex-.+_CID-${ROOT3_CID}\"" curl_ipfs_dir_listing_output + ' + test_expect_success "GET /ipns/ dir response has special Etag for generated dir listing" ' + grep -E "< Etag: \"DirIndex-.+_CID-${ROOT3_CID}\"" curl_ipns_dir_listing_output + ' + + ## dir static index.html should use CID of the index.html file for improved HTTP caching + test_expect_success "GET /ipfs/ dir index.html response has dir CID as Etag" ' + grep "< Etag: \"${ROOT4_CID}\"" curl_ipfs_dir_index.html_output + ' + test_expect_success "GET /ipns/ dir index.html response has dir CID as Etag" ' + grep "< Etag: \"${ROOT4_CID}\"" curl_ipns_dir_index.html_output + ' + + ## file + test_expect_success "GET /ipfs/ response has CID as Etag for a file" ' + grep "< Etag: \"${FILE_CID}\"" curl_ipfs_file_output + ' + test_expect_success "GET /ipns/ response has CID as Etag for a file" ' + grep "< Etag: \"${FILE_CID}\"" curl_ipns_file_output + ' + +test_kill_ipfs_daemon + +test_done diff --git a/test/sharness/t0116-prometheus.sh b/test/sharness/t0119-prometheus.sh similarity index 100% rename from test/sharness/t0116-prometheus.sh rename to test/sharness/t0119-prometheus.sh