Remove gvisor raw socket hack: icmp.go, icmp_conn.go, wg_stack.go

The raw endpoint approach (ExtractGVisorStack + EnableRawSockets +
NewRawEndpoint) was a dead end — gvisor can't forward ICMP through
WireGuard regardless of raw socket permissions.

spoofPingConn (TCP probe + reply synthesis) is the only working
approach and doesn't need any gvisor internals.
This commit is contained in:
NeoMody
2026-04-03 09:20:28 +08:00
parent 17135c822a
commit 26052b8c3b
6 changed files with 6 additions and 375 deletions

2
go.mod
View File

@@ -4,6 +4,7 @@ go 1.26.1
require (
go4.org/mem v0.0.0-20240501181205-ae6ca9944745
golang.org/x/net v0.48.0
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb
tailscale.com v1.96.5
)
@@ -22,7 +23,6 @@ require (
go4.org/netipx v0.0.0-20231129151722-fdeea329fbba // indirect
golang.org/x/crypto v0.46.0 // indirect
golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect
golang.org/x/net v0.48.0 // indirect
golang.org/x/sync v0.19.0 // indirect
golang.org/x/sys v0.40.0 // indirect
golang.org/x/time v0.12.0 // indirect

187
icmp.go
View File

@@ -1,187 +0,0 @@
package tailnet
import (
"bytes"
"net/netip"
"sync"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/waiter"
)
// ICMPForwarder intercepts ICMP echo requests entering the WireGuard
// netstack and forwards them as raw IP packets, then reads replies
// and writes them back. This is needed because gvisor's ICMP transport
// endpoint (used by DialPingAddr) does not reliably forward echo
// requests through the WireGuard tunnel on all platforms.
//
// It registers a transport protocol handler on the gvisor stack,
// similar to how Tailscale handles ICMP in their netstack.
type ICMPForwarder struct {
stack *stack.Stack
logf func(format string, args ...any)
mu sync.Mutex
sessions map[icmpSessionKey]*icmpSession
}
type icmpSessionKey struct {
src netip.Addr
dst netip.Addr
id uint16
}
type icmpSession struct {
ep tcpip.Endpoint
wq waiter.Queue
dst tcpip.FullAddress
origSrc netip.Addr
writePkt func([]byte) // callback to write reply back to TUN
}
// NewICMPForwarder creates an ICMP forwarder on the given gvisor stack.
// Call Install() to register the handler.
func NewICMPForwarder(s *stack.Stack, logf func(string, ...any)) *ICMPForwarder {
return &ICMPForwarder{
stack: s,
logf: logf,
sessions: make(map[icmpSessionKey]*icmpSession),
}
}
// Install registers the ICMP handler on the stack. After this, ICMP echo
// requests to WireGuard peers will be properly forwarded.
func (f *ICMPForwarder) Install() {
// Register handler for ICMPv4. When gvisor receives an ICMP packet
// on the netstack TUN, it calls our handler instead of the default.
f.stack.SetTransportProtocolHandler(header.ICMPv4ProtocolNumber,
func(id stack.TransportEndpointID, pkt *stack.PacketBuffer) bool {
return f.handleICMPv4(id, pkt)
})
f.logf("icmp: forwarder installed on gvisor stack")
}
func (f *ICMPForwarder) handleICMPv4(id stack.TransportEndpointID, pkt *stack.PacketBuffer) bool {
// Parse ICMP header
hdr := header.ICMPv4(pkt.TransportHeader().Slice())
if len(hdr) < header.ICMPv4MinimumSize {
return false
}
// Only handle echo replies (type 0) coming back from peers.
// Echo requests (type 8) going OUT are handled by the normal
// gvisor ICMP transport (DialPingAddr). But the reply coming
// back needs to be delivered to the PingConn.
//
// Actually, the issue is that gvisor's ICMP transport handles
// echo requests/replies fine in theory. The real problem is
// that PingConn.Read never gets the reply. Let's not intercept
// here — instead, let's try a different approach.
return false // let default handler process it
}
// PingThroughWG sends an ICMP echo request through the WireGuard tunnel
// by creating a raw ICMP endpoint on the gvisor stack, writing the echo
// request, and reading the reply. This bypasses DialPingAddr which has
// issues on some platforms.
func (f *ICMPForwarder) PingThroughWG(srcIP, dstIP netip.Addr, icmpPayload []byte) ([]byte, error) {
var wq waiter.Queue
ep, err := f.stack.NewRawEndpoint(
header.ICMPv4ProtocolNumber,
header.IPv4ProtocolNumber,
&wq,
true, // associated
)
if err != nil {
return nil, tcpipErrorToError(err)
}
defer ep.Close()
// Bind to source IP
bindErr := ep.Bind(tcpip.FullAddress{
NIC: 1,
Addr: addrToTCPIP(srcIP),
})
if bindErr != nil {
return nil, tcpipErrorToError(bindErr)
}
// Connect to destination
connErr := ep.Connect(tcpip.FullAddress{
NIC: 1,
Addr: addrToTCPIP(dstIP),
})
if connErr != nil {
return nil, tcpipErrorToError(connErr)
}
// Enable header-included mode so we send/receive raw IP+ICMP
ep.SocketOptions().SetHeaderIncluded(true)
// Build raw IP+ICMP packet
ipHdrLen := header.IPv4MinimumSize
totalLen := ipHdrLen + len(icmpPayload)
pkt := make([]byte, totalLen)
// IP header
ip := header.IPv4(pkt)
ip.Encode(&header.IPv4Fields{
TotalLength: uint16(totalLen),
TTL: 64,
Protocol: uint8(header.ICMPv4ProtocolNumber),
SrcAddr: addrToTCPIP(srcIP),
DstAddr: addrToTCPIP(dstIP),
})
ip.SetChecksum(^ip.CalculateChecksum())
// ICMP payload (already has type/code/checksum/id/seq from caller)
copy(pkt[ipHdrLen:], icmpPayload)
// Write — Payloader needs io.Reader + Len()
payload := bytes.NewReader(pkt)
var wOpts tcpip.WriteOptions
_, writeErr := ep.Write(payload, wOpts)
if writeErr != nil {
return nil, tcpipErrorToError(writeErr)
}
// Read reply
we, ch := waiter.NewChannelEntry(waiter.ReadableEvents)
wq.EventRegister(&we)
defer wq.EventUnregister(&we)
// Wait for data
<-ch
var result tcpip.ReadResult
replyBuf := make([]byte, 1500)
w := tcpip.SliceWriter(replyBuf)
result, readErr := ep.Read(&w, tcpip.ReadOptions{})
if readErr != nil {
return nil, tcpipErrorToError(readErr)
}
return replyBuf[:result.Count], nil
}
func addrToTCPIP(addr netip.Addr) tcpip.Address {
b := addr.As4()
return tcpip.AddrFrom4(b)
}
func tcpipErrorToError(err tcpip.Error) error {
if err == nil {
return nil
}
return &tcpipErr{err}
}
type tcpipErr struct {
err tcpip.Error
}
func (e *tcpipErr) Error() string {
return e.err.String()
}

View File

@@ -1,140 +0,0 @@
package tailnet
import (
"bytes"
"net"
"net/netip"
"time"
"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/waiter"
)
// rawPingConn implements net.Conn for ICMP echo over a gvisor raw endpoint.
// It sends/receives raw ICMP payloads (type+code+checksum+id+seq+data)
// through the WireGuard gvisor stack using a HeaderIncluded raw socket.
type rawPingConn struct {
ep tcpip.Endpoint
wq waiter.Queue
src netip.Addr
dst netip.Addr
logf func(string, ...any)
closed bool
}
type pingAddr struct{ addr netip.Addr }
func (a pingAddr) Network() string { return "ping4" }
func (a pingAddr) String() string { return a.addr.String() }
func newRawPingConn(fwd *ICMPForwarder, src, dst netip.Addr, logf func(string, ...any)) (*rawPingConn, error) {
var wq waiter.Queue
ep, err := fwd.stack.NewRawEndpoint(
header.ICMPv4ProtocolNumber,
header.IPv4ProtocolNumber,
&wq,
true, // associated
)
if err != nil {
return nil, tcpipErrorToError(err)
}
if bindErr := ep.Bind(tcpip.FullAddress{NIC: 1, Addr: addrToTCPIP(src)}); bindErr != nil {
ep.Close()
return nil, tcpipErrorToError(bindErr)
}
if connErr := ep.Connect(tcpip.FullAddress{NIC: 1, Addr: addrToTCPIP(dst)}); connErr != nil {
ep.Close()
return nil, tcpipErrorToError(connErr)
}
ep.SocketOptions().SetHeaderIncluded(true)
return &rawPingConn{ep: ep, wq: wq, src: src, dst: dst, logf: logf}, nil
}
// Write sends an ICMP echo request. The caller provides raw ICMP payload
// (type+code+checksum+id+seq+data). We wrap it in an IP header.
func (c *rawPingConn) Write(icmpPayload []byte) (int, error) {
ipHdrLen := header.IPv4MinimumSize
totalLen := ipHdrLen + len(icmpPayload)
pkt := make([]byte, totalLen)
ip := header.IPv4(pkt)
ip.Encode(&header.IPv4Fields{
TotalLength: uint16(totalLen),
TTL: 64,
Protocol: uint8(header.ICMPv4ProtocolNumber),
SrcAddr: addrToTCPIP(c.src),
DstAddr: addrToTCPIP(c.dst),
})
ip.SetChecksum(^ip.CalculateChecksum())
copy(pkt[ipHdrLen:], icmpPayload)
payload := bytes.NewReader(pkt)
n, writeErr := c.ep.Write(payload, tcpip.WriteOptions{})
if writeErr != nil {
return 0, tcpipErrorToError(writeErr)
}
c.logf("icmp: sent %d bytes (IP+ICMP) to %s", n, c.dst)
return len(icmpPayload), nil
}
// Read reads an ICMP echo reply. Returns raw ICMP payload (no IP header).
func (c *rawPingConn) Read(b []byte) (int, error) {
we, ch := waiter.NewChannelEntry(waiter.ReadableEvents)
c.wq.EventRegister(&we)
defer c.wq.EventUnregister(&we)
<-ch
readBuf := make([]byte, 1500)
w := tcpip.SliceWriter(readBuf)
result, readErr := c.ep.Read(&w, tcpip.ReadOptions{})
if readErr != nil {
return 0, tcpipErrorToError(readErr)
}
raw := readBuf[:result.Count]
// raw is IP+ICMP. Strip IP header to return just ICMP.
if len(raw) < header.IPv4MinimumSize {
return 0, &net.OpError{Op: "read", Err: net.ErrClosed}
}
ipHdrLen := int(header.IPv4(raw).HeaderLength())
if len(raw) < ipHdrLen {
return 0, &net.OpError{Op: "read", Err: net.ErrClosed}
}
icmpData := raw[ipHdrLen:]
n := copy(b, icmpData)
c.logf("icmp: recv %d bytes ICMP reply from %s", n, c.dst)
return n, nil
}
func (c *rawPingConn) Close() error {
if c.closed {
return nil
}
c.closed = true
c.ep.Close()
return nil
}
func (c *rawPingConn) LocalAddr() net.Addr { return pingAddr{c.src} }
func (c *rawPingConn) RemoteAddr() net.Addr { return pingAddr{c.dst} }
func (c *rawPingConn) SetDeadline(t time.Time) error { return nil }
func (c *rawPingConn) SetReadDeadline(t time.Time) error { return nil }
func (c *rawPingConn) SetWriteDeadline(t time.Time) error { return nil }
// Ensure rawPingConn implements net.Conn
var _ net.Conn = (*rawPingConn)(nil)
// NewRawEndpoint is an alias for creating raw ICMP endpoints from the stack.
// Exported so the mini-sing ICMP proxy can also use it directly.
func (f *ICMPForwarder) NewRawEndpoint(src, dst netip.Addr) (*rawPingConn, error) {
return newRawPingConn(f, src, dst, f.logf)
}
// Stack returns the underlying gvisor stack (for advanced use).
func (f *ICMPForwarder) Stack() *stack.Stack { return f.stack }

View File

@@ -30,7 +30,6 @@ type Outbound struct {
wgDevice *device.Device
tnet *netstack.Net
funnel *FunnelServer
icmpFwd *ICMPForwarder
selfAddrs []netip.Prefix
peers map[key.NodePublic]*tailcfg.Node
@@ -304,16 +303,6 @@ func (o *Outbound) createWireGuard(ctx context.Context) error {
}
o.logf("outbound: WireGuard device up")
// Setup ICMP forwarder on the gvisor stack for proper ping support.
// EnableRawSockets is needed because CreateNetTUN doesn't set RawFactory.
if o.tnet != nil {
if gs := ExtractGVisorStack(o.tnet); gs != nil {
EnableRawSockets(gs)
o.icmpFwd = NewICMPForwarder(gs, o.logf)
o.logf("outbound: ICMP forwarder ready (raw sockets enabled)")
}
}
return nil
}

View File

@@ -111,6 +111,11 @@ func searchString(s, substr string) bool {
return false
}
type pingAddr struct{ addr netip.Addr }
func (a pingAddr) Network() string { return "ping4" }
func (a pingAddr) String() string { return a.addr.String() }
func (c *spoofPingConn) Close() error { c.closed = true; return nil }
func (c *spoofPingConn) LocalAddr() net.Addr { return pingAddr{netip.Addr{}} }
func (c *spoofPingConn) RemoteAddr() net.Addr { return pingAddr{c.dst} }

View File

@@ -1,36 +0,0 @@
package tailnet
import (
"reflect"
"unsafe"
"golang.zx2c4.com/wireguard/tun/netstack"
"gvisor.dev/gvisor/pkg/tcpip/stack"
"gvisor.dev/gvisor/pkg/tcpip/transport/raw"
)
// ExtractGVisorStack extracts the *stack.Stack from a wireguard-go
// netstack.Net using unsafe reflection. This is needed because Net
// does not export its stack field.
func ExtractGVisorStack(tnet *netstack.Net) *stack.Stack {
v := reflect.ValueOf(tnet).Elem()
f := v.FieldByName("stack")
if !f.IsValid() {
return nil
}
return (*stack.Stack)(unsafe.Pointer(f.Pointer()))
}
// EnableRawSockets enables raw socket creation on a gvisor stack by
// setting the rawFactory field using unsafe. CreateNetTUN doesn't set
// RawFactory, so NewRawEndpoint returns "operation not permitted".
func EnableRawSockets(s *stack.Stack) {
v := reflect.ValueOf(s).Elem()
f := v.FieldByName("rawFactory")
if !f.IsValid() {
return
}
// Set the rawFactory to raw.EndpointFactory{} which allows raw endpoints
ptr := unsafe.Pointer(f.UnsafeAddr())
*(*stack.RawFactory)(ptr) = &raw.EndpointFactory{}
}