Merge pull request #9123 from ipfs/fix/gw-trailing-slash-unixfs-dirs

fix(gw): directory URL normalization
This commit is contained in:
Marcin Rataj 2022-07-21 21:34:49 +02:00 committed by GitHub
commit cf82ae54ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 64 additions and 36 deletions

View File

@ -41,28 +41,30 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit
}
originalUrlPath := requestURI.Path
// Ensure directory paths end with '/'
if originalUrlPath[len(originalUrlPath)-1] != '/' {
// don't redirect to trailing slash if it's go get
// https://github.com/ipfs/kubo/pull/3963
goget := r.URL.Query().Get("go-get") == "1"
if !goget {
suffix := "/"
// preserve query parameters
if r.URL.RawQuery != "" {
suffix = suffix + "?" + r.URL.RawQuery
}
// /ipfs/cid/foo?bar must be redirected to /ipfs/cid/foo/?bar
redirectURL := originalUrlPath + suffix
logger.Debugw("directory location moved permanently", "status", http.StatusMovedPermanently)
http.Redirect(w, r, redirectURL, http.StatusMovedPermanently)
return
}
}
// Check if directory has index.html, if so, serveFile
idxPath := ipath.Join(contentPath, "index.html")
idx, err := i.api.Unixfs().Get(ctx, idxPath)
switch err.(type) {
case nil:
cpath := contentPath.String()
dirwithoutslash := cpath[len(cpath)-1] != '/'
goget := r.URL.Query().Get("go-get") == "1"
if dirwithoutslash && !goget {
// See comment above where originalUrlPath is declared.
suffix := "/"
if r.URL.RawQuery != "" {
// preserve query parameters
suffix = suffix + "?" + r.URL.RawQuery
}
redirectURL := originalUrlPath + suffix
logger.Debugw("serving index.html file", "to", redirectURL, "status", http.StatusFound, "path", idxPath)
http.Redirect(w, r, redirectURL, http.StatusFound)
return
}
f, ok := idx.(files.File)
if !ok {
internalWebError(w, files.ErrNotReader)
@ -163,7 +165,7 @@ func (i *gatewayHandler) serveDirectory(ctx context.Context, w http.ResponseWrit
// add the correct link depending on whether the path ends with a slash
default:
if strings.HasSuffix(backLink, "/") {
backLink += "./.."
backLink += ".."
} else {
backLink += "/.."
}

View File

@ -380,9 +380,9 @@ func TestIPNSHostnameRedirect(t *testing.T) {
t.Fatal(err)
}
// expect 302 redirect to same path, but with trailing slash
if res.StatusCode != 302 {
t.Errorf("status is %d, expected 302", res.StatusCode)
// expect 301 redirect to same path, but with trailing slash
if res.StatusCode != 301 {
t.Errorf("status is %d, expected 301", res.StatusCode)
}
hdr := res.Header["Location"]
if len(hdr) < 1 {
@ -403,9 +403,9 @@ func TestIPNSHostnameRedirect(t *testing.T) {
t.Fatal(err)
}
// expect 302 redirect to same path, but with prefix and trailing slash
if res.StatusCode != 302 {
t.Errorf("status is %d, expected 302", res.StatusCode)
// expect 301 redirect to same path, but with prefix and trailing slash
if res.StatusCode != 301 {
t.Errorf("status is %d, expected 301", res.StatusCode)
}
hdr = res.Header["Location"]
if len(hdr) < 1 {
@ -492,7 +492,7 @@ func TestIPNSHostnameBacklinks(t *testing.T) {
if !matchPathOrBreadcrumbs(s, "/ipns/<a href=\"//example.net/\">example.net</a>/<a href=\"//example.net/foo%3F%20%23%3C%27\">foo? #&lt;&#39;</a>") {
t.Fatalf("expected a path in directory listing")
}
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/./..\">") {
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/..\">") {
t.Fatalf("expected backlink in directory listing")
}
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/file.txt\">") {
@ -566,7 +566,7 @@ func TestIPNSHostnameBacklinks(t *testing.T) {
if !matchPathOrBreadcrumbs(s, "/ipns/<a href=\"//example.net/\">example.net</a>/<a href=\"//example.net/foo%3F%20%23%3C%27\">foo? #&lt;&#39;</a>/<a href=\"//example.net/foo%3F%20%23%3C%27/bar\">bar</a>") {
t.Fatalf("expected a path in directory listing")
}
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/bar/./..\">") {
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/bar/..\">") {
t.Fatalf("expected backlink in directory listing")
}
if !strings.Contains(s, "<a href=\"/foo%3F%20%23%3C%27/bar/file.txt\">") {

View File

@ -72,9 +72,17 @@ test_expect_success "GET IPFS directory file output looks good" '
test_expect_success "GET IPFS directory with index.html returns redirect to add trailing slash" "
curl -sI -o response_without_slash \"http://127.0.0.1:$port/ipfs/$HASH2/dirwithindex?query=to-remember\" &&
test_should_contain \"HTTP/1.1 301 Moved Permanently\" response_without_slash &&
test_should_contain \"Location: /ipfs/$HASH2/dirwithindex/?query=to-remember\" response_without_slash
"
# This enables go get to parse go-import meta tags from index.html files stored in IPFS
# https://github.com/ipfs/kubo/pull/3963
test_expect_success "GET IPFS directory with index.html and no trailing slash returns expected output when go-get is passed" "
curl -s -o response_with_slash \"http://127.0.0.1:$port/ipfs/$HASH2/dirwithindex?go-get=1\" &&
test_should_contain \"hello i am a webpage\" response_with_slash
"
test_expect_success "GET IPFS directory with index.html and trailing slash returns expected output" "
curl -s -o response_with_slash \"http://127.0.0.1:$port/ipfs/$HASH2/dirwithindex/?query=to-remember\" &&
test_should_contain \"hello i am a webpage\" response_with_slash

View File

@ -22,7 +22,7 @@ test_expect_success "Add the test directory" '
'
test_expect_success "Test the directory listing" '
curl "$GWAY_ADDR/ipfs/$HASH" > list_response &&
curl "$GWAY_ADDR/ipfs/$HASH/" > list_response &&
test_should_contain ">foo<" list_response &&
test_should_contain ">bar<" list_response
'

View File

@ -268,7 +268,7 @@ test_expect_success "valid file and subdirectory paths in directory listing at {
test_expect_success "valid parent directory path in directory listing at {cid}.ipfs.localhost/sub/dir" '
curl -s --resolve $DIR_HOSTNAME:127.0.0.1 "http://$DIR_HOSTNAME/ipfs/ipns/" > list_response &&
test_should_contain "<a href=\"/ipfs/ipns/./..\">..</a>" list_response &&
test_should_contain "<a href=\"/ipfs/ipns/..\">..</a>" list_response &&
test_should_contain "<a href=\"/ipfs/ipns/bar\">bar</a>" list_response
'
@ -441,7 +441,7 @@ test_expect_success "valid file and directory paths in directory listing at {cid
test_expect_success "valid parent directory path in directory listing at {cid}.ipfs.example.com/sub/dir" '
curl -s -H "Host: $DIR_FQDN" http://127.0.0.1:$GWAY_PORT/ipfs/ipns/ > list_response &&
test_should_contain "<a href=\"/ipfs/ipns/./..\">..</a>" list_response &&
test_should_contain "<a href=\"/ipfs/ipns/..\">..</a>" list_response &&
test_should_contain "<a href=\"/ipfs/ipns/bar\">bar</a>" list_response
'

View File

@ -43,8 +43,14 @@ test_expect_success "path gw: backlink on root CID should be hidden" '
test_should_not_contain "<a href=\"/ipfs/$DIR_CID/\">..</a>" list_response
'
test_expect_success "path gw: Etag should be present" '
test_expect_success "path gw: redirect dir listing to URL with trailing slash" '
curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę > list_response &&
test_should_contain "HTTP/1.1 301 Moved Permanently" list_response &&
test_should_contain "Location: /ipfs/${DIR_CID}/%c4%85/%c4%99/" list_response
'
test_expect_success "path gw: Etag should be present" '
curl -sD - http://127.0.0.1:$GWAY_PORT/ipfs/${DIR_CID}/ą/ę/ > list_response &&
test_should_contain "Index of" list_response &&
test_should_contain "Etag: \"DirIndex-" list_response
'
@ -72,19 +78,25 @@ test_expect_success "path gw: hash column should be a CID link with filename par
DIR_HOSTNAME="${DIR_CID}.ipfs.localhost"
# note: we skip DNS lookup by running curl with --resolve $DIR_HOSTNAME:127.0.0.1
test_expect_success "path gw: backlink on root CID should be hidden" '
test_expect_success "subdomain gw: backlink on root CID should be hidden" '
curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ > list_response &&
test_should_contain "Index of" list_response &&
test_should_not_contain "<a href=\"/\">..</a>" list_response
'
test_expect_success "path gw: Etag should be present" '
test_expect_success "subdomain gw: redirect dir listing to URL with trailing slash" '
curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ą/ę > list_response &&
test_should_contain "HTTP/1.1 301 Moved Permanently" list_response &&
test_should_contain "Location: /%c4%85/%c4%99/" list_response
'
test_expect_success "subdomain gw: Etag should be present" '
curl -sD - --resolve $DIR_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DIR_HOSTNAME:$GWAY_PORT/ą/ę/ > list_response &&
test_should_contain "Index of" list_response &&
test_should_contain "Etag: \"DirIndex-" list_response
'
test_expect_success "path gw: backlink on subdirectory should point at parent directory" '
test_expect_success "subdomain gw: backlink on subdirectory should point at parent directory" '
test_should_contain "<a href=\"/%C4%85/%C4%99/..\">..</a>" list_response
'
@ -92,11 +104,11 @@ test_expect_success "subdomain gw: breadcrumbs should leverage path-based router
test_should_contain "/ipfs/<a href=\"//localhost:$GWAY_PORT/ipfs/$DIR_CID\">$DIR_CID</a>/<a href=\"//localhost:$GWAY_PORT/ipfs/$DIR_CID/%C4%85\">ą</a>/<a href=\"//localhost:$GWAY_PORT/ipfs/$DIR_CID/%C4%85/%C4%99\">ę</a>" list_response
'
test_expect_success "path gw: name column should be a link to content root mounted at subdomain origin" '
test_expect_success "subdomain gw: name column should be a link to content root mounted at subdomain origin" '
test_should_contain "<a href=\"/%C4%85/%C4%99/file-%C5%BA%C5%82.txt\">file-źł.txt</a>" list_response
'
test_expect_success "path gw: hash column should be a CID link to path router with filename param" '
test_expect_success "subdomain gw: hash column should be a CID link to path router with filename param" '
test_should_contain "<a class=\"ipfs-hash\" translate=\"no\" href=\"//localhost:$GWAY_PORT/ipfs/$FILE_CID?filename=file-%25C5%25BA%25C5%2582.txt\">" list_response
'
@ -121,8 +133,14 @@ test_expect_success "dnslink gw: backlink on root CID should be hidden" '
test_should_not_contain "<a href=\"/\">..</a>" list_response
'
test_expect_success "dnslink gw: Etag should be present" '
test_expect_success "dnslink gw: redirect dir listing to URL with trailing slash" '
curl -sD - --resolve $DNSLINK_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DNSLINK_HOSTNAME:$GWAY_PORT/ą/ę > list_response &&
test_should_contain "HTTP/1.1 301 Moved Permanently" list_response &&
test_should_contain "Location: /%c4%85/%c4%99/" list_response
'
test_expect_success "dnslink gw: Etag should be present" '
curl -sD - --resolve $DNSLINK_HOSTNAME:$GWAY_PORT:127.0.0.1 http://$DNSLINK_HOSTNAME:$GWAY_PORT/ą/ę/ > list_response &&
test_should_contain "Index of" list_response &&
test_should_contain "Etag: \"DirIndex-" list_response
'