diff --git a/core/commands/cmdutils/sanitize.go b/core/commands/cmdutils/sanitize.go new file mode 100644 index 000000000..4cd3d3f59 --- /dev/null +++ b/core/commands/cmdutils/sanitize.go @@ -0,0 +1,50 @@ +package cmdutils + +import ( + "strings" + "unicode" +) + +const maxRunes = 128 + +// CleanAndTrim sanitizes untrusted strings from remote peers to prevent display issues +// across web UIs, terminals, and logs. It replaces control characters, format characters, +// and surrogates with U+FFFD (�), then enforces a maximum length of 128 runes. +// +// This follows the libp2p identify specification and RFC 9839 guidance: +// replacing problematic code points is preferred over deletion as deletion +// is a known security risk. +func CleanAndTrim(str string) string { + // Build sanitized result + var result []rune + for _, r := range str { + // Replace control characters (Cc) with U+FFFD - prevents terminal escapes, CR, LF, etc. + if unicode.Is(unicode.Cc, r) { + result = append(result, '\uFFFD') + continue + } + // Replace format characters (Cf) with U+FFFD - prevents RTL/LTR overrides, zero-width chars + if unicode.Is(unicode.Cf, r) { + result = append(result, '\uFFFD') + continue + } + // Replace surrogate characters (Cs) with U+FFFD - invalid in UTF-8 + if unicode.Is(unicode.Cs, r) { + result = append(result, '\uFFFD') + continue + } + // Private use characters (Co) are preserved per spec + result = append(result, r) + } + + // Convert to string and trim whitespace + sanitized := strings.TrimSpace(string(result)) + + // Enforce maximum length (128 runes, not bytes) + runes := []rune(sanitized) + if len(runes) > maxRunes { + return string(runes[:maxRunes]) + } + + return sanitized +} diff --git a/core/commands/id.go b/core/commands/id.go index 295223258..58886699b 100644 --- a/core/commands/id.go +++ b/core/commands/id.go @@ -12,6 +12,7 @@ import ( version "github.com/ipfs/kubo" "github.com/ipfs/kubo/core" "github.com/ipfs/kubo/core/commands/cmdenv" + "github.com/ipfs/kubo/core/commands/cmdutils" cmds "github.com/ipfs/go-ipfs-cmds" ke "github.com/ipfs/kubo/core/commands/keyencode" @@ -173,12 +174,14 @@ func printPeer(keyEnc ke.KeyEncoder, ps pstore.Peerstore, p peer.ID) (interface{ slices.Sort(info.Addresses) protocols, _ := ps.GetProtocols(p) // don't care about errors here. - info.Protocols = append(info.Protocols, protocols...) + for _, proto := range protocols { + info.Protocols = append(info.Protocols, protocol.ID(cmdutils.CleanAndTrim(string(proto)))) + } slices.Sort(info.Protocols) if v, err := ps.Get(p, "AgentVersion"); err == nil { if vs, ok := v.(string); ok { - info.AgentVersion = vs + info.AgentVersion = cmdutils.CleanAndTrim(vs) } } diff --git a/core/commands/stat_dht.go b/core/commands/stat_dht.go index e6006e439..b4345f570 100644 --- a/core/commands/stat_dht.go +++ b/core/commands/stat_dht.go @@ -7,6 +7,7 @@ import ( "time" cmdenv "github.com/ipfs/kubo/core/commands/cmdenv" + "github.com/ipfs/kubo/core/commands/cmdutils" cmds "github.com/ipfs/go-ipfs-cmds" dht "github.com/libp2p/go-libp2p-kad-dht" @@ -92,7 +93,9 @@ This interface is not stable and may change from release to release. info := dhtPeerInfo{ID: p.String()} if ver, err := nd.Peerstore.Get(p, "AgentVersion"); err == nil { - info.AgentVersion, _ = ver.(string) + if vs, ok := ver.(string); ok { + info.AgentVersion = cmdutils.CleanAndTrim(vs) + } } else if err == pstore.ErrNotFound { // ignore } else { @@ -143,7 +146,9 @@ This interface is not stable and may change from release to release. info := dhtPeerInfo{ID: pi.Id.String()} if ver, err := nd.Peerstore.Get(pi.Id, "AgentVersion"); err == nil { - info.AgentVersion, _ = ver.(string) + if vs, ok := ver.(string); ok { + info.AgentVersion = cmdutils.CleanAndTrim(vs) + } } else if err == pstore.ErrNotFound { // ignore } else { diff --git a/core/commands/swarm.go b/core/commands/swarm.go index 153068438..533ccc078 100644 --- a/core/commands/swarm.go +++ b/core/commands/swarm.go @@ -18,6 +18,7 @@ import ( "github.com/ipfs/kubo/commands" "github.com/ipfs/kubo/config" "github.com/ipfs/kubo/core/commands/cmdenv" + "github.com/ipfs/kubo/core/commands/cmdutils" "github.com/ipfs/kubo/core/node/libp2p" "github.com/ipfs/kubo/repo" "github.com/ipfs/kubo/repo/fsrepo" @@ -27,6 +28,7 @@ import ( inet "github.com/libp2p/go-libp2p/core/network" "github.com/libp2p/go-libp2p/core/peer" pstore "github.com/libp2p/go-libp2p/core/peerstore" + "github.com/libp2p/go-libp2p/core/protocol" rcmgr "github.com/libp2p/go-libp2p/p2p/host/resource-manager" ma "github.com/multiformats/go-multiaddr" madns "github.com/multiformats/go-multiaddr-dns" @@ -290,7 +292,7 @@ var swarmPeersCmd = &cmds.Command{ } for _, s := range strs { - ci.Streams = append(ci.Streams, streamInfo{Protocol: string(s)}) + ci.Streams = append(ci.Streams, streamInfo{Protocol: cmdutils.CleanAndTrim(string(s))}) } } @@ -476,13 +478,15 @@ func (ci *connInfo) identifyPeer(ps pstore.Peerstore, p peer.ID) (IdOutput, erro slices.Sort(info.Addresses) if protocols, err := ps.GetProtocols(p); err == nil { - info.Protocols = append(info.Protocols, protocols...) + for _, proto := range protocols { + info.Protocols = append(info.Protocols, protocol.ID(cmdutils.CleanAndTrim(string(proto)))) + } slices.Sort(info.Protocols) } if v, err := ps.Get(p, "AgentVersion"); err == nil { if vs, ok := v.(string); ok { - info.AgentVersion = vs + info.AgentVersion = cmdutils.CleanAndTrim(vs) } } diff --git a/docs/examples/kubo-as-a-library/go.mod b/docs/examples/kubo-as-a-library/go.mod index 78fa7826d..d44a9977e 100644 --- a/docs/examples/kubo-as-a-library/go.mod +++ b/docs/examples/kubo-as-a-library/go.mod @@ -84,6 +84,7 @@ require ( github.com/ipfs/go-ds-pebble v0.5.1 // indirect github.com/ipfs/go-dsqueue v0.0.5 // indirect github.com/ipfs/go-fs-lock v0.1.1 // indirect + github.com/ipfs/go-ipfs-cmds v0.15.0 // indirect github.com/ipfs/go-ipfs-ds-help v1.1.1 // indirect github.com/ipfs/go-ipfs-pq v0.0.3 // indirect github.com/ipfs/go-ipfs-redirects-file v0.1.2 // indirect diff --git a/docs/examples/kubo-as-a-library/go.sum b/docs/examples/kubo-as-a-library/go.sum index bb3e3b153..822bfadfb 100644 --- a/docs/examples/kubo-as-a-library/go.sum +++ b/docs/examples/kubo-as-a-library/go.sum @@ -327,6 +327,8 @@ github.com/ipfs/go-fs-lock v0.1.1 h1:TecsP/Uc7WqYYatasreZQiP9EGRy4ZnKoG4yXxR33nw github.com/ipfs/go-fs-lock v0.1.1/go.mod h1:2goSXMCw7QfscHmSe09oXiR34DQeUdm+ei+dhonqly0= github.com/ipfs/go-ipfs-blockstore v1.3.1 h1:cEI9ci7V0sRNivqaOr0elDsamxXFxJMMMy7PTTDQNsQ= github.com/ipfs/go-ipfs-blockstore v1.3.1/go.mod h1:KgtZyc9fq+P2xJUiCAzbRdhhqJHvsw8u2Dlqy2MyRTE= +github.com/ipfs/go-ipfs-cmds v0.15.0 h1:nQDgKadrzyiFyYoZMARMIoVoSwe3gGTAfGvrWLeAQbQ= +github.com/ipfs/go-ipfs-cmds v0.15.0/go.mod h1:VABf/mv/wqvYX6hLG6Z+40eNAEw3FQO0bSm370Or3Wk= github.com/ipfs/go-ipfs-delay v0.0.0-20181109222059-70721b86a9a8/go.mod h1:8SP1YXK1M1kXuc4KJZINY3TQQ03J2rwBG9QfXmbRPrw= github.com/ipfs/go-ipfs-delay v0.0.1 h1:r/UXYyRcddO6thwOnhiznIAiSvxMECGgtv35Xs1IeRQ= github.com/ipfs/go-ipfs-delay v0.0.1/go.mod h1:8SP1YXK1M1kXuc4KJZINY3TQQ03J2rwBG9QfXmbRPrw= diff --git a/test/cli/agent_version_unicode_test.go b/test/cli/agent_version_unicode_test.go new file mode 100644 index 000000000..732f13466 --- /dev/null +++ b/test/cli/agent_version_unicode_test.go @@ -0,0 +1,220 @@ +package cli + +import ( + "strings" + "testing" + + "github.com/ipfs/kubo/core/commands/cmdutils" + "github.com/stretchr/testify/assert" +) + +func TestCleanAndTrimUnicode(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "Basic ASCII", + input: "kubo/1.0.0", + expected: "kubo/1.0.0", + }, + { + name: "Polish characters preserved", + input: "test-ąęćłńóśźż", + expected: "test-ąęćłńóśźż", + }, + { + name: "Chinese characters preserved", + input: "版本-中文测试", + expected: "版本-中文测试", + }, + { + name: "Arabic text preserved", + input: "اختبار-العربية", + expected: "اختبار-العربية", + }, + { + name: "Emojis preserved", + input: "version-1.0-🚀-🎉", + expected: "version-1.0-🚀-🎉", + }, + { + name: "Complex Unicode with combining marks preserved", + input: "h̸̢̢̢̢̢̢̢̢̢̢e̵̵̵̵̵̵̵̵̵̵l̷̷̷̷̷̷̷̷̷̷l̶̶̶̶̶̶̶̶̶̶o̴̴̴̴̴̴̴̴̴̴", + expected: "h̸̢̢̢̢̢̢̢̢̢̢e̵̵̵̵̵̵̵̵̵̵l̷̷̷̷̷̷̷̷̷̷l̶̶̶̶̶̶̶̶̶̶o̴̴̴̴̴̴̴̴̴̴", // Preserved as-is (only 50 runes) + }, + { + name: "Long text with combining marks truncated at 128", + input: strings.Repeat("ẽ̸̢̛̖̬͈͉͖͇͈̭̥́̓̌̾͊̊̂̄̍̅̂͌́", 10), // Very long text (260 runes) + expected: "ẽ̸̢̛̖̬͈͉͖͇͈̭̥́̓̌̾͊̊̂̄̍̅̂͌́ẽ̸̢̛̖̬͈͉͖͇͈̭̥́̓̌̾͊̊̂̄̍̅̂͌́ẽ̸̢̛̖̬͈͉͖͇͈̭̥́̓̌̾͊̊̂̄̍̅̂͌́ẽ̸̢̛̖̬͈͉͖͇͈̭̥́̓̌̾͊̊̂̄̍̅̂͌́ẽ̸̢̛̖̬͈͉͖͇͈̭̥́̓̌̾͊̊̂̄̍̅̂", // Truncated at 128 runes + }, + { + name: "Zero-width characters replaced with U+FFFD", + input: "test\u200Bzero\u200Cwidth\u200D\uFEFFchars", + expected: "test�zero�width��chars", + }, + { + name: "RTL/LTR override replaced with U+FFFD", + input: "test\u202Drtl\u202Eltr\u202Aoverride", + expected: "test�rtl�ltr�override", + }, + { + name: "Bidi isolates replaced with U+FFFD", + input: "test\u2066bidi\u2067isolate\u2068text\u2069end", + expected: "test�bidi�isolate�text�end", + }, + { + name: "Control characters replaced with U+FFFD", + input: "test\x00null\x1Fescape\x7Fdelete", + expected: "test�null�escape�delete", + }, + { + name: "Combining marks preserved", + input: "e\u0301\u0302\u0303\u0304\u0305", // e with 5 combining marks + expected: "e\u0301\u0302\u0303\u0304\u0305", // All preserved + }, + { + name: "No truncation at 70 characters", + input: "123456789012345678901234567890123456789012345678901234567890123456789", + expected: "123456789012345678901234567890123456789012345678901234567890123456789", + }, + { + name: "No truncation with Unicode - 70 rockets preserved", + input: strings.Repeat("🚀", 70), + expected: strings.Repeat("🚀", 70), + }, + { + name: "Empty string", + input: "", + expected: "", + }, + { + name: "Only whitespace with control chars", + input: " \t\n ", + expected: "\uFFFD\uFFFD", // Tab and newline become U+FFFD, spaces trimmed + }, + { + name: "Leading and trailing whitespace", + input: " test ", + expected: "test", + }, + { + name: "Complex mix - invisible chars replaced with U+FFFD, Unicode preserved", + input: "kubo/1.0-🚀\u200B h̸̢̏̔ḛ̶̽̀s̵t\u202E-ąęł-中文", + expected: "kubo/1.0-🚀� h̸̢̏̔ḛ̶̽̀s̵t�-ąęł-中文", + }, + { + name: "Emoji with skin tone preserved", + input: "👍🏽", // Thumbs up with skin tone modifier + expected: "👍🏽", // Preserved as-is + }, + { + name: "Mixed scripts preserved", + input: "Hello-你好-مرحبا-Здравствуйте", + expected: "Hello-你好-مرحبا-Здравствуйте", + }, + { + name: "Format characters replaced with U+FFFD", + input: "test\u00ADsoft\u2060word\u206Fnom\u200Ebreak", + expected: "test�soft�word�nom�break", // Soft hyphen, word joiner, etc replaced + }, + { + name: "Complex Unicode text with many combining marks (91 runes, no truncation)", + input: "ț̸̢͙̞̖̏̔ȩ̶̰͓̪͎̱̠̥̳͔̽̀̃̿̌̾̀͗̕̕͜s̵̢̛̖̬͈͉͖͇͈̭̥̃́̓̌̾͊̊̂̄̍̅̂͌́ͅţ̴̯̹̪͖͓̘̊́̑̄̋̈́͐̈́̔̇̄̂́̎̓͛͠ͅ test", + expected: "ț̸̢͙̞̖̏̔ȩ̶̰͓̪͎̱̠̥̳͔̽̀̃̿̌̾̀͗̕̕͜s̵̢̛̖̬͈͉͖͇͈̭̥̃́̓̌̾͊̊̂̄̍̅̂͌́ͅţ̴̯̹̪͖͓̘̊́̑̄̋̈́͐̈́̔̇̄̂́̎̓͛͠ͅ test", // Not truncated (91 < 128) + }, + { + name: "Truncation at 128 characters", + input: strings.Repeat("a", 150), + expected: strings.Repeat("a", 128), + }, + { + name: "Truncation with Unicode at 128", + input: strings.Repeat("🚀", 150), + expected: strings.Repeat("🚀", 128), + }, + { + name: "Private use characters preserved (per spec)", + input: "test\uE000\uF8FF", // Private use area characters + expected: "test\uE000\uF8FF", // Should be preserved + }, + { + name: "U+FFFD replacement for multiple categories", + input: "a\x00b\u200Cc\u202Ed", // control, format chars + expected: "a\uFFFDb\uFFFDc\uFFFDd", // All replaced with U+FFFD + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := cmdutils.CleanAndTrim(tt.input) + assert.Equal(t, tt.expected, result, "CleanAndTrim(%q) = %q, want %q", tt.input, result, tt.expected) + }) + } +} + +func TestCleanAndTrimIdempotent(t *testing.T) { + // Test that applying CleanAndTrim twice gives the same result + inputs := []string{ + "test-ąęćłńóśźż", + "版本-中文测试", + "version-1.0-🚀-🎉", + "h̸e̵l̷l̶o̴ w̸o̵r̷l̶d̴", + "test\u200Bzero\u200Cwidth", + } + + for _, input := range inputs { + once := cmdutils.CleanAndTrim(input) + twice := cmdutils.CleanAndTrim(once) + assert.Equal(t, once, twice, "CleanAndTrim should be idempotent for %q", input) + } +} + +func TestCleanAndTrimSecurity(t *testing.T) { + // Test that all invisible/dangerous characters are removed + tests := []struct { + name string + input string + check func(string) bool + }{ + { + name: "No zero-width spaces", + input: "test\u200B\u200C\u200Dtest", + check: func(s string) bool { + return !strings.Contains(s, "\u200B") && !strings.Contains(s, "\u200C") && !strings.Contains(s, "\u200D") + }, + }, + { + name: "No bidi overrides", + input: "test\u202A\u202B\u202C\u202D\u202Etest", + check: func(s string) bool { + for _, r := range []rune{0x202A, 0x202B, 0x202C, 0x202D, 0x202E} { + if strings.ContainsRune(s, r) { + return false + } + } + return true + }, + }, + { + name: "No control characters", + input: "test\x00\x01\x02\x1F\x7Ftest", + check: func(s string) bool { + for _, r := range s { + if r < 0x20 || r == 0x7F { + return false + } + } + return true + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := cmdutils.CleanAndTrim(tt.input) + assert.True(t, tt.check(result), "Security check failed for %q -> %q", tt.input, result) + }) + } +} diff --git a/test/dependencies/go.mod b/test/dependencies/go.mod index e31258307..ce363f5cf 100644 --- a/test/dependencies/go.mod +++ b/test/dependencies/go.mod @@ -141,6 +141,7 @@ require ( github.com/ipfs/go-cid v0.5.0 // indirect github.com/ipfs/go-datastore v0.9.0 // indirect github.com/ipfs/go-dsqueue v0.0.5 // indirect + github.com/ipfs/go-ipfs-cmds v0.15.0 // indirect github.com/ipfs/go-ipfs-redirects-file v0.1.2 // indirect github.com/ipfs/go-ipld-cbor v0.2.1 // indirect github.com/ipfs/go-ipld-format v0.6.3 // indirect diff --git a/test/dependencies/go.sum b/test/dependencies/go.sum index b15c30b03..06c94c5a3 100644 --- a/test/dependencies/go.sum +++ b/test/dependencies/go.sum @@ -350,6 +350,8 @@ github.com/ipfs/go-dsqueue v0.0.5 h1:TUOk15TlCJ/NKV8Yk2W5wgkEjDa44Nem7a7FGIjsMNU github.com/ipfs/go-dsqueue v0.0.5/go.mod h1:i/jAlpZjBbQJLioN+XKbFgnd+u9eAhGZs9IrqIzTd9g= github.com/ipfs/go-ipfs-blockstore v1.3.1 h1:cEI9ci7V0sRNivqaOr0elDsamxXFxJMMMy7PTTDQNsQ= github.com/ipfs/go-ipfs-blockstore v1.3.1/go.mod h1:KgtZyc9fq+P2xJUiCAzbRdhhqJHvsw8u2Dlqy2MyRTE= +github.com/ipfs/go-ipfs-cmds v0.15.0 h1:nQDgKadrzyiFyYoZMARMIoVoSwe3gGTAfGvrWLeAQbQ= +github.com/ipfs/go-ipfs-cmds v0.15.0/go.mod h1:VABf/mv/wqvYX6hLG6Z+40eNAEw3FQO0bSm370Or3Wk= github.com/ipfs/go-ipfs-delay v0.0.1 h1:r/UXYyRcddO6thwOnhiznIAiSvxMECGgtv35Xs1IeRQ= github.com/ipfs/go-ipfs-delay v0.0.1/go.mod h1:8SP1YXK1M1kXuc4KJZINY3TQQ03J2rwBG9QfXmbRPrw= github.com/ipfs/go-ipfs-ds-help v1.1.1 h1:B5UJOH52IbcfS56+Ul+sv8jnIV10lbjLF5eOO0C66Nw= diff --git a/version.go b/version.go index b8b4d38c8..751e790cc 100644 --- a/version.go +++ b/version.go @@ -3,6 +3,8 @@ package ipfs import ( "fmt" "runtime" + + "github.com/ipfs/kubo/core/commands/cmdutils" ) // CurrentCommit is the current git commit, this is set as a ldflag in the Makefile. @@ -27,13 +29,13 @@ func GetUserAgentVersion() string { } userAgent += userAgentSuffix } - return userAgent + return cmdutils.CleanAndTrim(userAgent) } var userAgentSuffix string func SetUserAgentSuffix(suffix string) { - userAgentSuffix = suffix + userAgentSuffix = cmdutils.CleanAndTrim(suffix) } type VersionInfo struct {