From c95f5c9fce3651e308ece121f8603e820a611d6b Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 19 Jul 2022 01:32:11 +0200 Subject: [PATCH 1/2] test(gw): index.html with go-get=1 This adds a missing test for https://github.com/ipfs/kubo/pull/3963 --- test/sharness/t0110-gateway.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/sharness/t0110-gateway.sh b/test/sharness/t0110-gateway.sh index 112203478..630e84648 100755 --- a/test/sharness/t0110-gateway.sh +++ b/test/sharness/t0110-gateway.sh @@ -75,6 +75,13 @@ test_expect_success "GET IPFS directory with index.html returns redirect to add test_should_contain \"Location: /ipfs/$HASH2/dirwithindex/?query=to-remember\" response_without_slash " +# This enables go get to parse go-import meta tags from index.html files stored in IPFS +# https://github.com/ipfs/kubo/pull/3963 +test_expect_success "GET IPFS directory with index.html and no trailing slash returns expected output when go-get is passed" " + curl -s -o response_with_slash \"http://127.0.0.1:$port/ipfs/$HASH2/dirwithindex?go-get=1\" && + test_should_contain \"hello i am a webpage\" response_with_slash +" + test_expect_success "GET IPFS directory with index.html and trailing slash returns expected output" " curl -s -o response_with_slash \"http://127.0.0.1:$port/ipfs/$HASH2/dirwithindex/?query=to-remember\" && test_should_contain \"hello i am a webpage\" response_with_slash From 3182986151d7ccaa110dd894d6fcba0ab77382f3 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Tue, 19 Jul 2022 01:17:37 +0200 Subject: [PATCH 2/2] fix(gw): ensure dir URLs have trailing slash This fixes a regression around directory listing and index.html hosting. Seems that during one of recent refactors code changed and we no longer check for trailing slash in HTTP request path, but look at content path instead. This cleans this up and also ensures dir behavior is the same for both index.html hosting and dir-index-html (generated listing). It also adds more tests so we catch any future regressions. --- core/corehttp/gateway_handler_unixfs_dir.go | 38 +++++++++++---------- core/corehttp/gateway_test.go | 16 ++++----- test/sharness/t0110-gateway.sh | 1 + test/sharness/t0113-gateway-symlink.sh | 2 +- test/sharness/t0114-gateway-subdomains.sh | 4 +-- test/sharness/t0115-gateway-dir-listing.sh | 32 +++++++++++++---- 6 files changed, 57 insertions(+), 36 deletions(-) diff --git a/core/corehttp/gateway_handler_unixfs_dir.go b/core/corehttp/gateway_handler_unixfs_dir.go index 9ba904475..a6ab7cb55 100644 --- a/core/corehttp/gateway_handler_unixfs_dir.go +++ b/core/corehttp/gateway_handler_unixfs_dir.go @@ -41,28 +41,30 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit } originalUrlPath := requestURI.Path + // Ensure directory paths end with '/' + if originalUrlPath[len(originalUrlPath)-1] != '/' { + // don't redirect to trailing slash if it's go get + // https://github.com/ipfs/kubo/pull/3963 + goget := r.URL.Query().Get("go-get") == "1" + if !goget { + suffix := "/" + // preserve query parameters + if r.URL.RawQuery != "" { + suffix = suffix + "?" + r.URL.RawQuery + } + // /ipfs/cid/foo?bar must be redirected to /ipfs/cid/foo/?bar + redirectURL := originalUrlPath + suffix + logger.Debugw("directory location moved permanently", "status", http.StatusMovedPermanently) + http.Redirect(w, r, redirectURL, http.StatusMovedPermanently) + return + } + } + // Check if directory has index.html, if so, serveFile idxPath := ipath.Join(contentPath, "index.html") idx, err := i.api.Unixfs().Get(ctx, idxPath) switch err.(type) { case nil: - cpath := contentPath.String() - dirwithoutslash := cpath[len(cpath)-1] != '/' - goget := r.URL.Query().Get("go-get") == "1" - if dirwithoutslash && !goget { - // See comment above where originalUrlPath is declared. - suffix := "/" - if r.URL.RawQuery != "" { - // preserve query parameters - suffix = suffix + "?" + r.URL.RawQuery - } - - redirectURL := originalUrlPath + suffix - logger.Debugw("serving index.html file", "to", redirectURL, "status", http.StatusFound, "path", idxPath) - http.Redirect(w, r, redirectURL, http.StatusFound) - return - } - f, ok := idx.(files.File) if !ok { internalWebError(w, files.ErrNotReader) @@ -163,7 +165,7 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit // add the correct link depending on whether the path ends with a slash default: if strings.HasSuffix(backLink, "/") { - backLink += "./.." + backLink += ".." } else { backLink += "/.." } diff --git a/core/corehttp/gateway_test.go b/core/corehttp/gateway_test.go index 5ac27341d..b6b504907 100644 --- a/core/corehttp/gateway_test.go +++ b/core/corehttp/gateway_test.go @@ -380,9 +380,9 @@ func TestIPNSHostnameRedirect(t *testing.T) { t.Fatal(err) } - // expect 302 redirect to same path, but with trailing slash - if res.StatusCode != 302 { - t.Errorf("status is %d, expected 302", res.StatusCode) + // expect 301 redirect to same path, but with trailing slash + if res.StatusCode != 301 { + t.Errorf("status is %d, expected 301", res.StatusCode) } hdr := res.Header["Location"] if len(hdr) < 1 { @@ -403,9 +403,9 @@ func TestIPNSHostnameRedirect(t *testing.T) { t.Fatal(err) } - // expect 302 redirect to same path, but with prefix and trailing slash - if res.StatusCode != 302 { - t.Errorf("status is %d, expected 302", res.StatusCode) + // expect 301 redirect to same path, but with prefix and trailing slash + if res.StatusCode != 301 { + t.Errorf("status is %d, expected 301", res.StatusCode) } hdr = res.Header["Location"] if len(hdr) < 1 { @@ -492,7 +492,7 @@ func TestIPNSHostnameBacklinks(t *testing.T) { if !matchPathOrBreadcrumbs(s, "/ipns/example.net/foo? #<'") { t.Fatalf("expected a path in directory listing") } - if !strings.Contains(s, "") { + if !strings.Contains(s, "") { t.Fatalf("expected backlink in directory listing") } if !strings.Contains(s, "") { @@ -566,7 +566,7 @@ func TestIPNSHostnameBacklinks(t *testing.T) { if !matchPathOrBreadcrumbs(s, "/ipns/example.net/foo? #<'/bar") { t.Fatalf("expected a path in directory listing") } - if !strings.Contains(s, "") { + if !strings.Contains(s, "") { t.Fatalf("expected backlink in directory listing") } if !strings.Contains(s, "") { diff --git a/test/sharness/t0110-gateway.sh b/test/sharness/t0110-gateway.sh index 630e84648..8cf28dbf4 100755 --- a/test/sharness/t0110-gateway.sh +++ b/test/sharness/t0110-gateway.sh @@ -72,6 +72,7 @@ test_expect_success "GET IPFS directory file output looks good" ' test_expect_success "GET IPFS directory with index.html returns redirect to add trailing slash" " curl -sI -o response_without_slash \"http://127.0.0.1:$port/ipfs/$HASH2/dirwithindex?query=to-remember\" && + test_should_contain \"HTTP/1.1 301 Moved Permanently\" response_without_slash && test_should_contain \"Location: /ipfs/$HASH2/dirwithindex/?query=to-remember\" response_without_slash " diff --git a/test/sharness/t0113-gateway-symlink.sh b/test/sharness/t0113-gateway-symlink.sh index d1f94bde7..9fa7ffa6e 100755 --- a/test/sharness/t0113-gateway-symlink.sh +++ b/test/sharness/t0113-gateway-symlink.sh @@ -22,7 +22,7 @@ test_expect_success "Add the test directory" ' ' test_expect_success "Test the directory listing" ' - curl "$GWAY_ADDR/ipfs/$HASH" > list_response && + curl "$GWAY_ADDR/ipfs/$HASH/" > list_response && test_should_contain ">foo<" list_response && test_should_contain ">bar<" list_response ' diff --git a/test/sharness/t0114-gateway-subdomains.sh b/test/sharness/t0114-gateway-subdomains.sh index f9ab0d824..0dad1e95c 100755 --- a/test/sharness/t0114-gateway-subdomains.sh +++ b/test/sharness/t0114-gateway-subdomains.sh @@ -268,7 +268,7 @@ test_expect_success "valid file and subdirectory paths in directory listing at { test_expect_success "valid parent directory path in directory listing at {cid}.ipfs.localhost/sub/dir" ' curl -s --resolve $DIR_HOSTNAME:127.0.0.1 "http://$DIR_HOSTNAME/ipfs/ipns/" > list_response && - test_should_contain ".." list_response && + test_should_contain ".." list_response && test_should_contain "bar" list_response ' @@ -441,7 +441,7 @@ test_expect_success "valid file and directory paths in directory listing at {cid test_expect_success "valid parent directory path in directory listing at {cid}.ipfs.example.com/sub/dir" ' curl -s -H "Host: $DIR_FQDN" http://127.0.0.1:$GWAY_PORT/ipfs/ipns/ > list_response && - test_should_contain ".." list_response && + test_should_contain ".." list_response && test_should_contain "bar" list_response ' diff --git a/test/sharness/t0115-gateway-dir-listing.sh b/test/sharness/t0115-gateway-dir-listing.sh index bd634388d..bb84203bc 100755 --- a/test/sharness/t0115-gateway-dir-listing.sh +++ b/test/sharness/t0115-gateway-dir-listing.sh @@ -43,8 +43,14 @@ test_expect_success "path gw: backlink on root CID should be hidden" ' test_should_not_contain ".." list_response ' -test_expect_success "path gw: Etag should be present" ' +test_expect_success "path gw: redirect dir listing to URL with trailing slash" ' curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę > list_response && + test_should_contain "HTTP/1.1 301 Moved Permanently" list_response && + test_should_contain "Location: /ipfs/${DIR_CID}/%c4%85/%c4%99/" list_response +' + +test_expect_success "path gw: Etag should be present" ' + curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę/ > list_response && test_should_contain "Index of" list_response && test_should_contain "Etag: \"DirIndex-" list_response ' @@ -72,19 +78,25 @@ test_expect_success "path gw: hash column should be a CID link with filename par DIR_HOSTNAME="${DIR_CID}.ipfs.localhost" # note: we skip DNS lookup by running curl with --resolve $DIR_HOSTNAME:127.0.0.1 -test_expect_success "path gw: backlink on root CID should be hidden" ' +test_expect_success "subdomain gw: backlink on root CID should be hidden" ' curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ > list_response && test_should_contain "Index of" list_response && test_should_not_contain ".." list_response ' -test_expect_success "path gw: Etag should be present" ' +test_expect_success "subdomain gw: redirect dir listing to URL with trailing slash" ' curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ą/ę > list_response && + test_should_contain "HTTP/1.1 301 Moved Permanently" list_response && + test_should_contain "Location: /%c4%85/%c4%99/" list_response +' + +test_expect_success "subdomain gw: Etag should be present" ' + curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ą/ę/ > list_response && test_should_contain "Index of" list_response && test_should_contain "Etag: \"DirIndex-" list_response ' -test_expect_success "path gw: backlink on subdirectory should point at parent directory" ' +test_expect_success "subdomain gw: backlink on subdirectory should point at parent directory" ' test_should_contain ".." list_response ' @@ -92,11 +104,11 @@ test_expect_success "subdomain gw: breadcrumbs should leverage path-based router test_should_contain "/ipfs/$DIR_CID/ą/ę" list_response ' -test_expect_success "path gw: name column should be a link to content root mounted at subdomain origin" ' +test_expect_success "subdomain gw: name column should be a link to content root mounted at subdomain origin" ' test_should_contain "file-źł.txt" list_response ' -test_expect_success "path gw: hash column should be a CID link to path router with filename param" ' +test_expect_success "subdomain gw: hash column should be a CID link to path router with filename param" ' test_should_contain "" list_response ' @@ -121,8 +133,14 @@ test_expect_success "dnslink gw: backlink on root CID should be hidden" ' test_should_not_contain ".." list_response ' -test_expect_success "dnslink gw: Etag should be present" ' +test_expect_success "dnslink gw: redirect dir listing to URL with trailing slash" ' curl -sD - --resolve $DNSLINK_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DNSLINK_HOSTNAME:$GWAY_PORT/ą/ę > list_response && + test_should_contain "HTTP/1.1 301 Moved Permanently" list_response && + test_should_contain "Location: /%c4%85/%c4%99/" list_response +' + +test_expect_success "dnslink gw: Etag should be present" ' + curl -sD - --resolve $DNSLINK_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DNSLINK_HOSTNAME:$GWAY_PORT/ą/ę/ > list_response && test_should_contain "Index of" list_response && test_should_contain "Etag: \"DirIndex-" list_response '