diff --git a/p2p/net/conn/dial.go b/p2p/net/conn/dial.go index ee9d9f4ad..fdebbc24b 100644 --- a/p2p/net/conn/dial.go +++ b/p2p/net/conn/dial.go @@ -5,12 +5,14 @@ import ( "math/rand" "net" "strings" + "syscall" context "github.com/jbenet/go-ipfs/Godeps/_workspace/src/code.google.com/p/go.net/context" ma "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/jbenet/go-multiaddr" manet "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/jbenet/go-multiaddr-net" reuseport "github.com/jbenet/go-ipfs/Godeps/_workspace/src/github.com/jbenet/go-reuseport" + addrutil "github.com/jbenet/go-ipfs/p2p/net/swarm/addr" peer "github.com/jbenet/go-ipfs/p2p/peer" debugerror "github.com/jbenet/go-ipfs/util/debugerror" ) @@ -75,8 +77,7 @@ func (d *Dialer) rawConnDial(ctx context.Context, raddr ma.Multiaddr, remote pee // before doing anything, check we're going to be able to dial. // we may not support the given address. - _, _, err := manet.DialArgs(raddr) - if err != nil { + if _, _, err := manet.DialArgs(raddr); err != nil { return nil, err } @@ -86,45 +87,76 @@ func (d *Dialer) rawConnDial(ctx context.Context, raddr ma.Multiaddr, remote pee // get local addr to use. laddr := pickLocalAddr(d.LocalAddrs, raddr) - log.Debugf("%s dialing %s -- %s --> %s", d.LocalPeer, remote, laddr, raddr) + if laddr != nil { // dial using reuseport.Dialer, because we're probably reusing addrs. // this is optimistic, as the reuseDial may fail to bind the port. - if nconn, err := d.reuseDial(laddr, raddr); err == nil { + if nconn, retry, reuseErr := d.reuseDial(laddr, raddr); reuseErr == nil { // if it worked, wrap the raw net.Conn with our manet.Conn log.Debugf("%s reuse worked! %s %s %s", d.LocalPeer, laddr, nconn.RemoteAddr(), nconn) return manet.WrapNetConn(nconn) + } else if !retry { + // reuseDial is sure this is a legitimate dial failure, not a reuseport failure. + return nil, reuseErr } else { - log.Debugf("%s port reuse failed: %s %s", d.LocalPeer, laddr, err) + // this is a failure to reuse port. log it. + log.Debugf("%s port reuse failed: %s --> %s -- %s", d.LocalPeer, laddr, raddr, reuseErr) } - // if not, we fall back to regular Dial without a local addr specified. } - // no local addr, or failed to reuse. just dial straight with a new port. + // no local addr, or reuseport failed. just dial straight with a new port. return d.Dialer.Dial(raddr) } -func (d *Dialer) reuseDial(laddr, raddr ma.Multiaddr) (net.Conn, error) { +func (d *Dialer) reuseDial(laddr, raddr ma.Multiaddr) (conn net.Conn, retry bool, err error) { + if laddr == nil { + // if we're given no local address no sense in using reuseport to dial, dial out as usual. + return nil, true, reuseport.ErrReuseFailed + } + // give reuse.Dialer the manet.Dialer's Dialer. // (wow, Dialer should've so been an interface...) rd := reuseport.Dialer{d.Dialer.Dialer} // get the local net.Addr manually - var err error rd.D.LocalAddr, err = manet.ToNetAddr(laddr) if err != nil { - return nil, err + return nil, true, err // something wrong with laddr. retry without. } // get the raddr dial args for rd.dial network, netraddr, err := manet.DialArgs(raddr) if err != nil { - return nil, err + return nil, true, err // something wrong with laddr. retry without. } // rd.Dial gets us a net.Conn with SO_REUSEPORT and SO_REUSEADDR set. - return rd.Dial(network, netraddr) + conn, err = rd.Dial(network, netraddr) + return conn, reuseErrShouldRetry(err), err // hey! it worked! +} + +// reuseErrShouldRetry diagnoses whether to retry after a reuse error. +// if we failed to bind, we should retry. if bind worked and this is a +// real dial error (remote end didnt answer) then we should not retry. +func reuseErrShouldRetry(err error) bool { + if err == nil { + return false // hey, it worked! no need to retry. + } + + errno, ok := err.(syscall.Errno) + if !ok { // not an errno? who knows what this is. retry. + return true + } + + switch errno { + case syscall.EADDRINUSE, syscall.EADDRNOTAVAIL: + return true // failure to bind. retry. + case syscall.ECONNREFUSED: + return false // real dial error + default: + return true // optimistically default to retry. + } } func pickLocalAddr(laddrs []ma.Multiaddr, raddr ma.Multiaddr) (laddr ma.Multiaddr) { @@ -132,11 +164,30 @@ func pickLocalAddr(laddrs []ma.Multiaddr, raddr ma.Multiaddr) (laddr ma.Multiadd return nil } + // make sure that we ONLY use local addrs that match the remote addr. laddrs = manet.AddrMatch(raddr, laddrs) if len(laddrs) < 1 { return nil } + // make sure that we ONLY use local addrs that CAN dial the remote addr. + // filter out all the local addrs that aren't capable + raddrIPLayer := ma.Split(raddr)[0] + raddrIsLoopback := manet.IsIPLoopback(raddrIPLayer) + raddrIsLinkLocal := manet.IsIP6LinkLocal(raddrIPLayer) + laddrs = addrutil.FilterAddrs(laddrs, func(a ma.Multiaddr) bool { + laddrIPLayer := ma.Split(a)[0] + laddrIsLoopback := manet.IsIPLoopback(laddrIPLayer) + laddrIsLinkLocal := manet.IsIP6LinkLocal(laddrIPLayer) + if laddrIsLoopback { // our loopback addrs can only dial loopbacks. + return raddrIsLoopback + } + if laddrIsLinkLocal { + return raddrIsLinkLocal // out linklocal addrs can only dial link locals. + } + return true + }) + // TODO pick with a good heuristic // we use a random one for now to prevent bad addresses from making nodes unreachable // with a random selection, multiple tries may work. diff --git a/p2p/net/swarm/swarm_dial.go b/p2p/net/swarm/swarm_dial.go index 4528c2c4d..c6160b476 100644 --- a/p2p/net/swarm/swarm_dial.go +++ b/p2p/net/swarm/swarm_dial.go @@ -251,8 +251,10 @@ func (s *Swarm) dial(ctx context.Context, p peer.ID) (*Conn, error) { log.Warning("Dial not given PrivateKey, so WILL NOT SECURE conn.") } - // get our own addrs - localAddrs := s.peers.Addresses(s.local) + // get our own addrs. try dialing out from our listener addresses (reusing ports) + // Note that using our peerstore's addresses here is incorrect, as that would + // include observed addresses. TODO: make peerstore's address book smarter. + localAddrs := s.ListenAddresses() if len(localAddrs) == 0 { log.Debug("Dialing out with no local addresses.") }