fix(gw): ensure dir URLs have trailing slash

This fixes a regression around directory listing and index.html hosting.
Seems that during one of recent refactors code changed and we no longer
check for trailing slash in HTTP request path, but look at content path
instead.

This cleans this up and also ensures dir behavior is the same for
both index.html hosting and dir-index-html (generated listing).

It also adds more tests so we catch any future regressions.
This commit is contained in:
Marcin Rataj 2022-07-19 01:17:37 +02:00
parent c95f5c9fce
commit 3182986151
6 changed files with 57 additions and 36 deletions

View File

@ -41,28 +41,30 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit
}
originalUrlPath := requestURI.Path
// Ensure directory paths end with '/'
if originalUrlPath[len(originalUrlPath)-1] != '/' {
// don't redirect to trailing slash if it's go get
// https://github.com/ipfs/kubo/pull/3963
goget := r.URL.Query().Get("go-get") == "1"
if !goget {
suffix := "/"
// preserve query parameters
if r.URL.RawQuery != "" {
suffix = suffix + "?" + r.URL.RawQuery
}
// /ipfs/cid/foo?bar must be redirected to /ipfs/cid/foo/?bar
redirectURL := originalUrlPath + suffix
logger.Debugw("directory location moved permanently", "status", http.StatusMovedPermanently)
http.Redirect(w, r, redirectURL, http.StatusMovedPermanently)
return
}
}
// Check if directory has index.html, if so, serveFile
idxPath := ipath.Join(contentPath, "index.html")
idx, err := i.api.Unixfs().Get(ctx, idxPath)
switch err.(type) {
case nil:
cpath := contentPath.String()
dirwithoutslash := cpath[len(cpath)-1] != '/'
goget := r.URL.Query().Get("go-get") == "1"
if dirwithoutslash && !goget {
// See comment above where originalUrlPath is declared.
suffix := "/"
if r.URL.RawQuery != "" {
// preserve query parameters
suffix = suffix + "?" + r.URL.RawQuery
}
redirectURL := originalUrlPath + suffix
logger.Debugw("serving index.html file", "to", redirectURL, "status", http.StatusFound, "path", idxPath)
http.Redirect(w, r, redirectURL, http.StatusFound)
return
}
f, ok := idx.(files.File)
if !ok {
internalWebError(w, files.ErrNotReader)
@ -163,7 +165,7 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit
// add the correct link depending on whether the path ends with a slash
default:
if strings.HasSuffix(backLink, "/") {
backLink += "./.."
backLink += ".."
} else {
backLink += "/.."
}

View File

@ -380,9 +380,9 @@ func TestIPNSHostnameRedirect(t *testing.T) {
t.Fatal(err)
}
// expect 302 redirect to same path, but with trailing slash
if res.StatusCode != 302 {
t.Errorf("status is %d, expected 302", res.StatusCode)
// expect 301 redirect to same path, but with trailing slash
if res.StatusCode != 301 {
t.Errorf("status is %d, expected 301", res.StatusCode)
}
hdr := res.Header["Location"]
if len(hdr) < 1 {
@ -403,9 +403,9 @@ func TestIPNSHostnameRedirect(t *testing.T) {
t.Fatal(err)
}
// expect 302 redirect to same path, but with prefix and trailing slash
if res.StatusCode != 302 {
t.Errorf("status is %d, expected 302", res.StatusCode)
// expect 301 redirect to same path, but with prefix and trailing slash
if res.StatusCode != 301 {
t.Errorf("status is %d, expected 301", res.StatusCode)
}
hdr = res.Header["Location"]
if len(hdr) < 1 {
@ -492,7 +492,7 @@ func TestIPNSHostnameBacklinks(t *testing.T) {
if !matchPathOrBreadcrumbs(s, "/ipns/<a href=\"//example.net/\">example.net</a>/<a href=\"//example.net/foo%3F%20%23%3C%27\">foo? #&lt;&#39;</a>") {
t.Fatalf("expected a path in directory listing")
}
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/./..\">") {
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/..\">") {
t.Fatalf("expected backlink in directory listing")
}
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/file.txt\">") {
@ -566,7 +566,7 @@ func TestIPNSHostnameBacklinks(t *testing.T) {
if !matchPathOrBreadcrumbs(s, "/ipns/<a href=\"//example.net/\">example.net</a>/<a href=\"//example.net/foo%3F%20%23%3C%27\">foo? #&lt;&#39;</a>/<a href=\"//example.net/foo%3F%20%23%3C%27/bar\">bar</a>") {
t.Fatalf("expected a path in directory listing")
}
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/bar/./..\">") {
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/bar/..\">") {
t.Fatalf("expected backlink in directory listing")
}
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/bar/file.txt\">") {

View File

@ -72,6 +72,7 @@ test_expect_success "GET IPFS directory file output looks good" '
test_expect_success "GET IPFS directory with index.html returns redirect to add trailing slash" "
curl -sI -o response_without_slash \"http://127.0.0.1:$port/ipfs/$HASH2/dirwithindex?query=to-remember\" &&
test_should_contain \"HTTP/1.1 301 Moved Permanently\" response_without_slash &&
test_should_contain \"Location: /ipfs/$HASH2/dirwithindex/?query=to-remember\" response_without_slash
"

View File

@ -22,7 +22,7 @@ test_expect_success "Add the test directory" '
'
test_expect_success "Test the directory listing" '
curl "$GWAY_ADDR/ipfs/$HASH" > list_response &&
curl "$GWAY_ADDR/ipfs/$HASH/" > list_response &&
test_should_contain ">foo<" list_response &&
test_should_contain ">bar<" list_response
'

View File

@ -268,7 +268,7 @@ test_expect_success "valid file and subdirectory paths in directory listing at {
test_expect_success "valid parent directory path in directory listing at {cid}.ipfs.localhost/sub/dir" '
curl -s --resolve $DIR_HOSTNAME:127.0.0.1 "http://$DIR_HOSTNAME/ipfs/ipns/" > list_response &&
test_should_contain "<a href=\"/ipfs/ipns/./..\">..</a>" list_response &&
test_should_contain "<a href=\"/ipfs/ipns/..\">..</a>" list_response &&
test_should_contain "<a href=\"/ipfs/ipns/bar\">bar</a>" list_response
'
@ -441,7 +441,7 @@ test_expect_success "valid file and directory paths in directory listing at {cid
test_expect_success "valid parent directory path in directory listing at {cid}.ipfs.example.com/sub/dir" '
curl -s -H "Host: $DIR_FQDN" http://127.0.0.1:$GWAY_PORT/ipfs/ipns/ > list_response &&
test_should_contain "<a href=\"/ipfs/ipns/./..\">..</a>" list_response &&
test_should_contain "<a href=\"/ipfs/ipns/..\">..</a>" list_response &&
test_should_contain "<a href=\"/ipfs/ipns/bar\">bar</a>" list_response
'

View File

@ -43,8 +43,14 @@ test_expect_success "path gw: backlink on root CID should be hidden" '
test_should_not_contain "<a href=\"/ipfs/$DIR_CID/\">..</a>" list_response
'
test_expect_success "path gw: Etag should be present" '
test_expect_success "path gw: redirect dir listing to URL with trailing slash" '
curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę > list_response &&
test_should_contain "HTTP/1.1 301 Moved Permanently" list_response &&
test_should_contain "Location: /ipfs/${DIR_CID}/%c4%85/%c4%99/" list_response
'
test_expect_success "path gw: Etag should be present" '
curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę/ > list_response &&
test_should_contain "Index of" list_response &&
test_should_contain "Etag: \"DirIndex-" list_response
'
@ -72,19 +78,25 @@ test_expect_success "path gw: hash column should be a CID link with filename par
DIR_HOSTNAME="${DIR_CID}.ipfs.localhost"
# note: we skip DNS lookup by running curl with --resolve $DIR_HOSTNAME:127.0.0.1
test_expect_success "path gw: backlink on root CID should be hidden" '
test_expect_success "subdomain gw: backlink on root CID should be hidden" '
curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ > list_response &&
test_should_contain "Index of" list_response &&
test_should_not_contain "<a href=\"/\">..</a>" list_response
'
test_expect_success "path gw: Etag should be present" '
test_expect_success "subdomain gw: redirect dir listing to URL with trailing slash" '
curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ą/ę > list_response &&
test_should_contain "HTTP/1.1 301 Moved Permanently" list_response &&
test_should_contain "Location: /%c4%85/%c4%99/" list_response
'
test_expect_success "subdomain gw: Etag should be present" '
curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ą/ę/ > list_response &&
test_should_contain "Index of" list_response &&
test_should_contain "Etag: \"DirIndex-" list_response
'
test_expect_success "path gw: backlink on subdirectory should point at parent directory" '
test_expect_success "subdomain gw: backlink on subdirectory should point at parent directory" '
test_should_contain "<a href=\"/%C4%85/%C4%99/..\">..</a>" list_response
'
@ -92,11 +104,11 @@ test_expect_success "subdomain gw: breadcrumbs should leverage path-based router
test_should_contain "/ipfs/<a href=\"//localhost:$GWAY_PORT/ipfs/$DIR_CID\">$DIR_CID</a>/<a href=\"//localhost:$GWAY_PORT/ipfs/$DIR_CID/%C4%85\">ą</a>/<a href=\"//localhost:$GWAY_PORT/ipfs/$DIR_CID/%C4%85/%C4%99\">ę</a>" list_response
'
test_expect_success "path gw: name column should be a link to content root mounted at subdomain origin" '
test_expect_success "subdomain gw: name column should be a link to content root mounted at subdomain origin" '
test_should_contain "<a href=\"/%C4%85/%C4%99/file-%C5%BA%C5%82.txt\">file-źł.txt</a>" list_response
'
test_expect_success "path gw: hash column should be a CID link to path router with filename param" '
test_expect_success "subdomain gw: hash column should be a CID link to path router with filename param" '
test_should_contain "<a class=\"ipfs-hash\" translate=\"no\" href=\"//localhost:$GWAY_PORT/ipfs/$FILE_CID?filename=file-%25C5%25BA%25C5%2582.txt\">" list_response
'
@ -121,8 +133,14 @@ test_expect_success "dnslink gw: backlink on root CID should be hidden" '
test_should_not_contain "<a href=\"/\">..</a>" list_response
'
test_expect_success "dnslink gw: Etag should be present" '
test_expect_success "dnslink gw: redirect dir listing to URL with trailing slash" '
curl -sD - --resolve $DNSLINK_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DNSLINK_HOSTNAME:$GWAY_PORT/ą/ę > list_response &&
test_should_contain "HTTP/1.1 301 Moved Permanently" list_response &&
test_should_contain "Location: /%c4%85/%c4%99/" list_response
'
test_expect_success "dnslink gw: Etag should be present" '
curl -sD - --resolve $DNSLINK_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DNSLINK_HOSTNAME:$GWAY_PORT/ą/ę/ > list_response &&
test_should_contain "Index of" list_response &&
test_should_contain "Etag: \"DirIndex-" list_response
'