Remove gvisor raw socket hack: icmp.go, icmp_conn.go, wg_stack.go
The raw endpoint approach (ExtractGVisorStack + EnableRawSockets + NewRawEndpoint) was a dead end — gvisor can't forward ICMP through WireGuard regardless of raw socket permissions. spoofPingConn (TCP probe + reply synthesis) is the only working approach and doesn't need any gvisor internals.
This commit is contained in:
2
go.mod
2
go.mod
@@ -4,6 +4,7 @@ go 1.26.1
|
||||
|
||||
require (
|
||||
go4.org/mem v0.0.0-20240501181205-ae6ca9944745
|
||||
golang.org/x/net v0.48.0
|
||||
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb
|
||||
tailscale.com v1.96.5
|
||||
)
|
||||
@@ -22,7 +23,6 @@ require (
|
||||
go4.org/netipx v0.0.0-20231129151722-fdeea329fbba // indirect
|
||||
golang.org/x/crypto v0.46.0 // indirect
|
||||
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect
|
||||
golang.org/x/net v0.48.0 // indirect
|
||||
golang.org/x/sync v0.19.0 // indirect
|
||||
golang.org/x/sys v0.40.0 // indirect
|
||||
golang.org/x/time v0.12.0 // indirect
|
||||
|
||||
187
icmp.go
187
icmp.go
@@ -1,187 +0,0 @@
|
||||
package tailnet
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"net/netip"
|
||||
"sync"
|
||||
|
||||
"gvisor.dev/gvisor/pkg/tcpip"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/header"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/stack"
|
||||
"gvisor.dev/gvisor/pkg/waiter"
|
||||
)
|
||||
|
||||
// ICMPForwarder intercepts ICMP echo requests entering the WireGuard
|
||||
// netstack and forwards them as raw IP packets, then reads replies
|
||||
// and writes them back. This is needed because gvisor's ICMP transport
|
||||
// endpoint (used by DialPingAddr) does not reliably forward echo
|
||||
// requests through the WireGuard tunnel on all platforms.
|
||||
//
|
||||
// It registers a transport protocol handler on the gvisor stack,
|
||||
// similar to how Tailscale handles ICMP in their netstack.
|
||||
type ICMPForwarder struct {
|
||||
stack *stack.Stack
|
||||
logf func(format string, args ...any)
|
||||
|
||||
mu sync.Mutex
|
||||
sessions map[icmpSessionKey]*icmpSession
|
||||
}
|
||||
|
||||
type icmpSessionKey struct {
|
||||
src netip.Addr
|
||||
dst netip.Addr
|
||||
id uint16
|
||||
}
|
||||
|
||||
type icmpSession struct {
|
||||
ep tcpip.Endpoint
|
||||
wq waiter.Queue
|
||||
dst tcpip.FullAddress
|
||||
origSrc netip.Addr
|
||||
writePkt func([]byte) // callback to write reply back to TUN
|
||||
}
|
||||
|
||||
// NewICMPForwarder creates an ICMP forwarder on the given gvisor stack.
|
||||
// Call Install() to register the handler.
|
||||
func NewICMPForwarder(s *stack.Stack, logf func(string, ...any)) *ICMPForwarder {
|
||||
return &ICMPForwarder{
|
||||
stack: s,
|
||||
logf: logf,
|
||||
sessions: make(map[icmpSessionKey]*icmpSession),
|
||||
}
|
||||
}
|
||||
|
||||
// Install registers the ICMP handler on the stack. After this, ICMP echo
|
||||
// requests to WireGuard peers will be properly forwarded.
|
||||
func (f *ICMPForwarder) Install() {
|
||||
// Register handler for ICMPv4. When gvisor receives an ICMP packet
|
||||
// on the netstack TUN, it calls our handler instead of the default.
|
||||
f.stack.SetTransportProtocolHandler(header.ICMPv4ProtocolNumber,
|
||||
func(id stack.TransportEndpointID, pkt *stack.PacketBuffer) bool {
|
||||
return f.handleICMPv4(id, pkt)
|
||||
})
|
||||
f.logf("icmp: forwarder installed on gvisor stack")
|
||||
}
|
||||
|
||||
func (f *ICMPForwarder) handleICMPv4(id stack.TransportEndpointID, pkt *stack.PacketBuffer) bool {
|
||||
// Parse ICMP header
|
||||
hdr := header.ICMPv4(pkt.TransportHeader().Slice())
|
||||
if len(hdr) < header.ICMPv4MinimumSize {
|
||||
return false
|
||||
}
|
||||
|
||||
// Only handle echo replies (type 0) coming back from peers.
|
||||
// Echo requests (type 8) going OUT are handled by the normal
|
||||
// gvisor ICMP transport (DialPingAddr). But the reply coming
|
||||
// back needs to be delivered to the PingConn.
|
||||
//
|
||||
// Actually, the issue is that gvisor's ICMP transport handles
|
||||
// echo requests/replies fine in theory. The real problem is
|
||||
// that PingConn.Read never gets the reply. Let's not intercept
|
||||
// here — instead, let's try a different approach.
|
||||
return false // let default handler process it
|
||||
}
|
||||
|
||||
// PingThroughWG sends an ICMP echo request through the WireGuard tunnel
|
||||
// by creating a raw ICMP endpoint on the gvisor stack, writing the echo
|
||||
// request, and reading the reply. This bypasses DialPingAddr which has
|
||||
// issues on some platforms.
|
||||
func (f *ICMPForwarder) PingThroughWG(srcIP, dstIP netip.Addr, icmpPayload []byte) ([]byte, error) {
|
||||
var wq waiter.Queue
|
||||
ep, err := f.stack.NewRawEndpoint(
|
||||
header.ICMPv4ProtocolNumber,
|
||||
header.IPv4ProtocolNumber,
|
||||
&wq,
|
||||
true, // associated
|
||||
)
|
||||
if err != nil {
|
||||
return nil, tcpipErrorToError(err)
|
||||
}
|
||||
defer ep.Close()
|
||||
|
||||
// Bind to source IP
|
||||
bindErr := ep.Bind(tcpip.FullAddress{
|
||||
NIC: 1,
|
||||
Addr: addrToTCPIP(srcIP),
|
||||
})
|
||||
if bindErr != nil {
|
||||
return nil, tcpipErrorToError(bindErr)
|
||||
}
|
||||
|
||||
// Connect to destination
|
||||
connErr := ep.Connect(tcpip.FullAddress{
|
||||
NIC: 1,
|
||||
Addr: addrToTCPIP(dstIP),
|
||||
})
|
||||
if connErr != nil {
|
||||
return nil, tcpipErrorToError(connErr)
|
||||
}
|
||||
|
||||
// Enable header-included mode so we send/receive raw IP+ICMP
|
||||
ep.SocketOptions().SetHeaderIncluded(true)
|
||||
|
||||
// Build raw IP+ICMP packet
|
||||
ipHdrLen := header.IPv4MinimumSize
|
||||
totalLen := ipHdrLen + len(icmpPayload)
|
||||
pkt := make([]byte, totalLen)
|
||||
|
||||
// IP header
|
||||
ip := header.IPv4(pkt)
|
||||
ip.Encode(&header.IPv4Fields{
|
||||
TotalLength: uint16(totalLen),
|
||||
TTL: 64,
|
||||
Protocol: uint8(header.ICMPv4ProtocolNumber),
|
||||
SrcAddr: addrToTCPIP(srcIP),
|
||||
DstAddr: addrToTCPIP(dstIP),
|
||||
})
|
||||
ip.SetChecksum(^ip.CalculateChecksum())
|
||||
|
||||
// ICMP payload (already has type/code/checksum/id/seq from caller)
|
||||
copy(pkt[ipHdrLen:], icmpPayload)
|
||||
|
||||
// Write — Payloader needs io.Reader + Len()
|
||||
payload := bytes.NewReader(pkt)
|
||||
var wOpts tcpip.WriteOptions
|
||||
_, writeErr := ep.Write(payload, wOpts)
|
||||
if writeErr != nil {
|
||||
return nil, tcpipErrorToError(writeErr)
|
||||
}
|
||||
|
||||
// Read reply
|
||||
we, ch := waiter.NewChannelEntry(waiter.ReadableEvents)
|
||||
wq.EventRegister(&we)
|
||||
defer wq.EventUnregister(&we)
|
||||
|
||||
// Wait for data
|
||||
<-ch
|
||||
|
||||
var result tcpip.ReadResult
|
||||
replyBuf := make([]byte, 1500)
|
||||
w := tcpip.SliceWriter(replyBuf)
|
||||
result, readErr := ep.Read(&w, tcpip.ReadOptions{})
|
||||
if readErr != nil {
|
||||
return nil, tcpipErrorToError(readErr)
|
||||
}
|
||||
|
||||
return replyBuf[:result.Count], nil
|
||||
}
|
||||
|
||||
func addrToTCPIP(addr netip.Addr) tcpip.Address {
|
||||
b := addr.As4()
|
||||
return tcpip.AddrFrom4(b)
|
||||
}
|
||||
|
||||
func tcpipErrorToError(err tcpip.Error) error {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
return &tcpipErr{err}
|
||||
}
|
||||
|
||||
type tcpipErr struct {
|
||||
err tcpip.Error
|
||||
}
|
||||
|
||||
func (e *tcpipErr) Error() string {
|
||||
return e.err.String()
|
||||
}
|
||||
140
icmp_conn.go
140
icmp_conn.go
@@ -1,140 +0,0 @@
|
||||
package tailnet
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"net"
|
||||
"net/netip"
|
||||
"time"
|
||||
|
||||
"gvisor.dev/gvisor/pkg/tcpip"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/header"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/stack"
|
||||
"gvisor.dev/gvisor/pkg/waiter"
|
||||
)
|
||||
|
||||
// rawPingConn implements net.Conn for ICMP echo over a gvisor raw endpoint.
|
||||
// It sends/receives raw ICMP payloads (type+code+checksum+id+seq+data)
|
||||
// through the WireGuard gvisor stack using a HeaderIncluded raw socket.
|
||||
type rawPingConn struct {
|
||||
ep tcpip.Endpoint
|
||||
wq waiter.Queue
|
||||
src netip.Addr
|
||||
dst netip.Addr
|
||||
logf func(string, ...any)
|
||||
closed bool
|
||||
}
|
||||
|
||||
type pingAddr struct{ addr netip.Addr }
|
||||
|
||||
func (a pingAddr) Network() string { return "ping4" }
|
||||
func (a pingAddr) String() string { return a.addr.String() }
|
||||
|
||||
func newRawPingConn(fwd *ICMPForwarder, src, dst netip.Addr, logf func(string, ...any)) (*rawPingConn, error) {
|
||||
var wq waiter.Queue
|
||||
ep, err := fwd.stack.NewRawEndpoint(
|
||||
header.ICMPv4ProtocolNumber,
|
||||
header.IPv4ProtocolNumber,
|
||||
&wq,
|
||||
true, // associated
|
||||
)
|
||||
if err != nil {
|
||||
return nil, tcpipErrorToError(err)
|
||||
}
|
||||
|
||||
if bindErr := ep.Bind(tcpip.FullAddress{NIC: 1, Addr: addrToTCPIP(src)}); bindErr != nil {
|
||||
ep.Close()
|
||||
return nil, tcpipErrorToError(bindErr)
|
||||
}
|
||||
if connErr := ep.Connect(tcpip.FullAddress{NIC: 1, Addr: addrToTCPIP(dst)}); connErr != nil {
|
||||
ep.Close()
|
||||
return nil, tcpipErrorToError(connErr)
|
||||
}
|
||||
|
||||
ep.SocketOptions().SetHeaderIncluded(true)
|
||||
|
||||
return &rawPingConn{ep: ep, wq: wq, src: src, dst: dst, logf: logf}, nil
|
||||
}
|
||||
|
||||
// Write sends an ICMP echo request. The caller provides raw ICMP payload
|
||||
// (type+code+checksum+id+seq+data). We wrap it in an IP header.
|
||||
func (c *rawPingConn) Write(icmpPayload []byte) (int, error) {
|
||||
ipHdrLen := header.IPv4MinimumSize
|
||||
totalLen := ipHdrLen + len(icmpPayload)
|
||||
pkt := make([]byte, totalLen)
|
||||
|
||||
ip := header.IPv4(pkt)
|
||||
ip.Encode(&header.IPv4Fields{
|
||||
TotalLength: uint16(totalLen),
|
||||
TTL: 64,
|
||||
Protocol: uint8(header.ICMPv4ProtocolNumber),
|
||||
SrcAddr: addrToTCPIP(c.src),
|
||||
DstAddr: addrToTCPIP(c.dst),
|
||||
})
|
||||
ip.SetChecksum(^ip.CalculateChecksum())
|
||||
copy(pkt[ipHdrLen:], icmpPayload)
|
||||
|
||||
payload := bytes.NewReader(pkt)
|
||||
n, writeErr := c.ep.Write(payload, tcpip.WriteOptions{})
|
||||
if writeErr != nil {
|
||||
return 0, tcpipErrorToError(writeErr)
|
||||
}
|
||||
c.logf("icmp: sent %d bytes (IP+ICMP) to %s", n, c.dst)
|
||||
return len(icmpPayload), nil
|
||||
}
|
||||
|
||||
// Read reads an ICMP echo reply. Returns raw ICMP payload (no IP header).
|
||||
func (c *rawPingConn) Read(b []byte) (int, error) {
|
||||
we, ch := waiter.NewChannelEntry(waiter.ReadableEvents)
|
||||
c.wq.EventRegister(&we)
|
||||
defer c.wq.EventUnregister(&we)
|
||||
|
||||
<-ch
|
||||
|
||||
readBuf := make([]byte, 1500)
|
||||
w := tcpip.SliceWriter(readBuf)
|
||||
result, readErr := c.ep.Read(&w, tcpip.ReadOptions{})
|
||||
if readErr != nil {
|
||||
return 0, tcpipErrorToError(readErr)
|
||||
}
|
||||
|
||||
raw := readBuf[:result.Count]
|
||||
// raw is IP+ICMP. Strip IP header to return just ICMP.
|
||||
if len(raw) < header.IPv4MinimumSize {
|
||||
return 0, &net.OpError{Op: "read", Err: net.ErrClosed}
|
||||
}
|
||||
ipHdrLen := int(header.IPv4(raw).HeaderLength())
|
||||
if len(raw) < ipHdrLen {
|
||||
return 0, &net.OpError{Op: "read", Err: net.ErrClosed}
|
||||
}
|
||||
icmpData := raw[ipHdrLen:]
|
||||
n := copy(b, icmpData)
|
||||
c.logf("icmp: recv %d bytes ICMP reply from %s", n, c.dst)
|
||||
return n, nil
|
||||
}
|
||||
|
||||
func (c *rawPingConn) Close() error {
|
||||
if c.closed {
|
||||
return nil
|
||||
}
|
||||
c.closed = true
|
||||
c.ep.Close()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *rawPingConn) LocalAddr() net.Addr { return pingAddr{c.src} }
|
||||
func (c *rawPingConn) RemoteAddr() net.Addr { return pingAddr{c.dst} }
|
||||
func (c *rawPingConn) SetDeadline(t time.Time) error { return nil }
|
||||
func (c *rawPingConn) SetReadDeadline(t time.Time) error { return nil }
|
||||
func (c *rawPingConn) SetWriteDeadline(t time.Time) error { return nil }
|
||||
|
||||
// Ensure rawPingConn implements net.Conn
|
||||
var _ net.Conn = (*rawPingConn)(nil)
|
||||
|
||||
// NewRawEndpoint is an alias for creating raw ICMP endpoints from the stack.
|
||||
// Exported so the mini-sing ICMP proxy can also use it directly.
|
||||
func (f *ICMPForwarder) NewRawEndpoint(src, dst netip.Addr) (*rawPingConn, error) {
|
||||
return newRawPingConn(f, src, dst, f.logf)
|
||||
}
|
||||
|
||||
// Stack returns the underlying gvisor stack (for advanced use).
|
||||
func (f *ICMPForwarder) Stack() *stack.Stack { return f.stack }
|
||||
11
outbound.go
11
outbound.go
@@ -30,7 +30,6 @@ type Outbound struct {
|
||||
wgDevice *device.Device
|
||||
tnet *netstack.Net
|
||||
funnel *FunnelServer
|
||||
icmpFwd *ICMPForwarder
|
||||
|
||||
selfAddrs []netip.Prefix
|
||||
peers map[key.NodePublic]*tailcfg.Node
|
||||
@@ -304,16 +303,6 @@ func (o *Outbound) createWireGuard(ctx context.Context) error {
|
||||
}
|
||||
o.logf("outbound: WireGuard device up")
|
||||
|
||||
// Setup ICMP forwarder on the gvisor stack for proper ping support.
|
||||
// EnableRawSockets is needed because CreateNetTUN doesn't set RawFactory.
|
||||
if o.tnet != nil {
|
||||
if gs := ExtractGVisorStack(o.tnet); gs != nil {
|
||||
EnableRawSockets(gs)
|
||||
o.icmpFwd = NewICMPForwarder(gs, o.logf)
|
||||
o.logf("outbound: ICMP forwarder ready (raw sockets enabled)")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -111,6 +111,11 @@ func searchString(s, substr string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
type pingAddr struct{ addr netip.Addr }
|
||||
|
||||
func (a pingAddr) Network() string { return "ping4" }
|
||||
func (a pingAddr) String() string { return a.addr.String() }
|
||||
|
||||
func (c *spoofPingConn) Close() error { c.closed = true; return nil }
|
||||
func (c *spoofPingConn) LocalAddr() net.Addr { return pingAddr{netip.Addr{}} }
|
||||
func (c *spoofPingConn) RemoteAddr() net.Addr { return pingAddr{c.dst} }
|
||||
|
||||
36
wg_stack.go
36
wg_stack.go
@@ -1,36 +0,0 @@
|
||||
package tailnet
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"unsafe"
|
||||
|
||||
"golang.zx2c4.com/wireguard/tun/netstack"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/stack"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/transport/raw"
|
||||
)
|
||||
|
||||
// ExtractGVisorStack extracts the *stack.Stack from a wireguard-go
|
||||
// netstack.Net using unsafe reflection. This is needed because Net
|
||||
// does not export its stack field.
|
||||
func ExtractGVisorStack(tnet *netstack.Net) *stack.Stack {
|
||||
v := reflect.ValueOf(tnet).Elem()
|
||||
f := v.FieldByName("stack")
|
||||
if !f.IsValid() {
|
||||
return nil
|
||||
}
|
||||
return (*stack.Stack)(unsafe.Pointer(f.Pointer()))
|
||||
}
|
||||
|
||||
// EnableRawSockets enables raw socket creation on a gvisor stack by
|
||||
// setting the rawFactory field using unsafe. CreateNetTUN doesn't set
|
||||
// RawFactory, so NewRawEndpoint returns "operation not permitted".
|
||||
func EnableRawSockets(s *stack.Stack) {
|
||||
v := reflect.ValueOf(s).Elem()
|
||||
f := v.FieldByName("rawFactory")
|
||||
if !f.IsValid() {
|
||||
return
|
||||
}
|
||||
// Set the rawFactory to raw.EndpointFactory{} which allows raw endpoints
|
||||
ptr := unsafe.Pointer(f.UnsafeAddr())
|
||||
*(*stack.RawFactory)(ptr) = &raw.EndpointFactory{}
|
||||
}
|
||||
Reference in New Issue
Block a user