forked from quic-go/quic-go
make Path MTU Discovery resilient to random packet loss (#4545)
This commit is contained in:
@@ -18,9 +18,7 @@ import (
|
||||
)
|
||||
|
||||
var _ = Describe("DPLPMTUD", func() {
|
||||
// This test is very sensitive to packet loss, as the loss of a single Path MTU probe packet makes DPLPMTUD
|
||||
// clip the assumed MTU at that value.
|
||||
It("discovers the MTU", FlakeAttempts(3), func() {
|
||||
It("discovers the MTU", func() {
|
||||
rtt := scaleDuration(5 * time.Millisecond)
|
||||
const mtu = 1400
|
||||
|
||||
|
||||
@@ -25,16 +25,80 @@ const (
|
||||
maxMTUDiff = 20
|
||||
// send a probe packet every mtuProbeDelay RTTs
|
||||
mtuProbeDelay = 5
|
||||
// Once maxLostMTUProbes MTU probe packets larger than a certain size are lost,
|
||||
// MTU discovery won't probe for larger MTUs than this size.
|
||||
// The algorithm used here is resilient to packet loss of (maxLostMTUProbes - 1) packets.
|
||||
maxLostMTUProbes = 3
|
||||
)
|
||||
|
||||
// The Path MTU is found by sending a larger packet every now and then.
|
||||
// If the packet is acknowledged, we conclude that the path supports this larger packet size.
|
||||
// If the packet is lost, this can mean one of two things:
|
||||
// 1. The path doesn't support this larger packet size, or
|
||||
// 2. The packet was lost due to packet loss, independent of its size.
|
||||
// The algorithm used here is resilient to packet loss of (maxLostMTUProbes - 1) packets.
|
||||
// For simplicty, the following example use maxLostMTUProbes = 2.
|
||||
//
|
||||
// Initialization:
|
||||
// |------------------------------------------------------------------------------|
|
||||
// min max
|
||||
//
|
||||
// The first MTU probe packet will have size (min+max)/2.
|
||||
// Assume that this packet is acknowledged. We can now move the min marker,
|
||||
// and continue the search in the resulting interval.
|
||||
//
|
||||
// If 1st probe packet acknowledged:
|
||||
// |---------------------------------------|--------------------------------------|
|
||||
// min max
|
||||
//
|
||||
// If 1st probe packet lost:
|
||||
// |---------------------------------------|--------------------------------------|
|
||||
// min lost[0] max
|
||||
//
|
||||
// We can't conclude that the path doesn't support this packet size, since the loss of the probe
|
||||
// packet could have been unrelated to the packet size. A larger probe packet will be sent later on.
|
||||
// After a loss, the next probe packet has size (min+lost[0])/2.
|
||||
// Now assume this probe packet is acknowledged:
|
||||
//
|
||||
// 2nd probe packet acknowledged:
|
||||
// |------------------|--------------------|--------------------------------------|
|
||||
// min lost[0] max
|
||||
//
|
||||
// First of all, we conclude that the path supports at least this MTU. That's progress!
|
||||
// Second, we probe a bit more aggressively with the next probe packet:
|
||||
// After an acknowledgement, the next probe packet has size (min+max)/2.
|
||||
// This means we'll send a packet larger than the first probe packet (which was lost).
|
||||
//
|
||||
// If 3rd probe packet acknowledged:
|
||||
// |-------------------------------------------------|----------------------------|
|
||||
// min max
|
||||
//
|
||||
// We can conclude that the loss of the 1st probe packet was not due to its size, and
|
||||
// continue searching in a much smaller interval now.
|
||||
//
|
||||
// If 3rd probe packet lost:
|
||||
// |------------------|--------------------|---------|----------------------------|
|
||||
// min lost[0] max
|
||||
//
|
||||
// Since in our example numPTOProbes = 2, and we lost 2 packets smaller than max, we
|
||||
// conclude that this packet size is not supported on the path, and reduce the maximum
|
||||
// value of the search interval.
|
||||
//
|
||||
// MTU discovery concludes once the interval min and max has been narrowed down to maxMTUDiff.
|
||||
|
||||
type mtuFinder struct {
|
||||
lastProbeTime time.Time
|
||||
mtuIncreased func(protocol.ByteCount)
|
||||
|
||||
rttStats *utils.RTTStats
|
||||
|
||||
inFlight protocol.ByteCount // the size of the probe packet currently in flight. InvalidByteCount if none is in flight
|
||||
current protocol.ByteCount
|
||||
max protocol.ByteCount // the maximum value, as advertised by the peer (or our maximum size buffer)
|
||||
min protocol.ByteCount
|
||||
limit protocol.ByteCount
|
||||
|
||||
// on initialization, we treat the maximum size as the first "lost" packet
|
||||
lost [maxLostMTUProbes]protocol.ByteCount
|
||||
lastProbeWasLost bool
|
||||
|
||||
tracer *logging.ConnectionTracer
|
||||
}
|
||||
@@ -47,29 +111,43 @@ func newMTUDiscoverer(
|
||||
mtuIncreased func(protocol.ByteCount),
|
||||
tracer *logging.ConnectionTracer,
|
||||
) *mtuFinder {
|
||||
return &mtuFinder{
|
||||
f := &mtuFinder{
|
||||
inFlight: protocol.InvalidByteCount,
|
||||
current: start,
|
||||
max: max,
|
||||
min: start,
|
||||
limit: max,
|
||||
rttStats: rttStats,
|
||||
mtuIncreased: mtuIncreased,
|
||||
tracer: tracer,
|
||||
}
|
||||
for i := range f.lost {
|
||||
if i == 0 {
|
||||
f.lost[i] = max
|
||||
continue
|
||||
}
|
||||
f.lost[i] = protocol.InvalidByteCount
|
||||
}
|
||||
return f
|
||||
}
|
||||
|
||||
func (f *mtuFinder) done() bool {
|
||||
return f.max-f.current <= maxMTUDiff+1
|
||||
return f.max()-f.min <= maxMTUDiff+1
|
||||
}
|
||||
|
||||
func (f *mtuFinder) max() protocol.ByteCount {
|
||||
for i, v := range f.lost {
|
||||
if v == protocol.InvalidByteCount {
|
||||
return f.lost[i-1]
|
||||
}
|
||||
}
|
||||
return f.lost[len(f.lost)-1]
|
||||
}
|
||||
|
||||
func (f *mtuFinder) Start() {
|
||||
if f.max == protocol.InvalidByteCount {
|
||||
panic("invalid")
|
||||
}
|
||||
f.lastProbeTime = time.Now() // makes sure the first probe packet is not sent immediately
|
||||
}
|
||||
|
||||
func (f *mtuFinder) ShouldSendProbe(now time.Time) bool {
|
||||
if f.max == 0 || f.lastProbeTime.IsZero() {
|
||||
if f.lastProbeTime.IsZero() {
|
||||
return false
|
||||
}
|
||||
if f.inFlight != protocol.InvalidByteCount || f.done() {
|
||||
@@ -79,7 +157,12 @@ func (f *mtuFinder) ShouldSendProbe(now time.Time) bool {
|
||||
}
|
||||
|
||||
func (f *mtuFinder) GetPing() (ackhandler.Frame, protocol.ByteCount) {
|
||||
size := (f.max + f.current) / 2
|
||||
var size protocol.ByteCount
|
||||
if f.lastProbeWasLost {
|
||||
size = (f.min + f.lost[0]) / 2
|
||||
} else {
|
||||
size = (f.min + f.max()) / 2
|
||||
}
|
||||
f.lastProbeTime = time.Now()
|
||||
f.inFlight = size
|
||||
return ackhandler.Frame{
|
||||
@@ -89,7 +172,7 @@ func (f *mtuFinder) GetPing() (ackhandler.Frame, protocol.ByteCount) {
|
||||
}
|
||||
|
||||
func (f *mtuFinder) CurrentSize() protocol.ByteCount {
|
||||
return f.current
|
||||
return f.min
|
||||
}
|
||||
|
||||
type mtuFinderAckHandler struct {
|
||||
@@ -104,7 +187,25 @@ func (h *mtuFinderAckHandler) OnAcked(wire.Frame) {
|
||||
panic("OnAcked callback called although there's no MTU probe packet in flight")
|
||||
}
|
||||
h.inFlight = protocol.InvalidByteCount
|
||||
h.current = size
|
||||
h.min = size
|
||||
h.lastProbeWasLost = false
|
||||
// remove all values smaller than size from the lost array
|
||||
var j int
|
||||
for i, v := range h.lost {
|
||||
if size < v {
|
||||
j = i
|
||||
break
|
||||
}
|
||||
}
|
||||
if j > 0 {
|
||||
for i := 0; i < len(h.lost); i++ {
|
||||
if i+j < len(h.lost) {
|
||||
h.lost[i] = h.lost[i+j]
|
||||
} else {
|
||||
h.lost[i] = protocol.InvalidByteCount
|
||||
}
|
||||
}
|
||||
}
|
||||
if h.tracer != nil && h.tracer.UpdatedMTU != nil {
|
||||
h.tracer.UpdatedMTU(size, h.done())
|
||||
}
|
||||
@@ -116,6 +217,13 @@ func (h *mtuFinderAckHandler) OnLost(wire.Frame) {
|
||||
if size == protocol.InvalidByteCount {
|
||||
panic("OnLost callback called although there's no MTU probe packet in flight")
|
||||
}
|
||||
h.max = size
|
||||
h.lastProbeWasLost = true
|
||||
h.inFlight = protocol.InvalidByteCount
|
||||
for i, v := range h.lost {
|
||||
if size < v {
|
||||
copy(h.lost[i+1:], h.lost[i:])
|
||||
h.lost[i] = size
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
package quic
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"golang.org/x/exp/rand"
|
||||
|
||||
"github.com/quic-go/quic-go/internal/protocol"
|
||||
"github.com/quic-go/quic-go/internal/utils"
|
||||
"github.com/quic-go/quic-go/logging"
|
||||
@@ -25,6 +27,7 @@ var _ = Describe("MTU Discoverer", func() {
|
||||
now time.Time
|
||||
discoveredMTU protocol.ByteCount
|
||||
)
|
||||
r := rand.New(rand.NewSource(uint64(GinkgoRandomSeed())))
|
||||
|
||||
BeforeEach(func() {
|
||||
rttStats = &utils.RTTStats{}
|
||||
@@ -76,6 +79,7 @@ var _ = Describe("MTU Discoverer", func() {
|
||||
t := now.Add(5 * rtt)
|
||||
for d.ShouldSendProbe(t) {
|
||||
ping, size := d.GetPing()
|
||||
fmt.Println("sending", size)
|
||||
ping.Handler.OnAcked(ping.Frame)
|
||||
sizes = append(sizes, size)
|
||||
t = t.Add(5 * rtt)
|
||||
@@ -91,53 +95,107 @@ var _ = Describe("MTU Discoverer", func() {
|
||||
}
|
||||
})
|
||||
|
||||
It("finds the MTU", func() {
|
||||
const rep = 3000
|
||||
var maxDiff protocol.ByteCount
|
||||
for i := 0; i < rep; i++ {
|
||||
maxMTU := protocol.ByteCount(rand.Intn(int(3000-startMTU))) + startMTU + 1
|
||||
currentMTU := startMTU
|
||||
var tracedMTU protocol.ByteCount
|
||||
var tracerDone bool
|
||||
d := newMTUDiscoverer(
|
||||
rttStats,
|
||||
startMTU,
|
||||
maxMTU,
|
||||
func(s protocol.ByteCount) { currentMTU = s },
|
||||
&logging.ConnectionTracer{
|
||||
UpdatedMTU: func(mtu logging.ByteCount, done bool) {
|
||||
tracedMTU = mtu
|
||||
tracerDone = done
|
||||
},
|
||||
It("finds the MTU", MustPassRepeatedly(300), func() {
|
||||
maxMTU := protocol.ByteCount(r.Intn(int(3000-startMTU))) + startMTU + 1
|
||||
currentMTU := startMTU
|
||||
var tracedMTU protocol.ByteCount
|
||||
var tracerDone bool
|
||||
d := newMTUDiscoverer(
|
||||
rttStats,
|
||||
startMTU,
|
||||
maxMTU,
|
||||
func(s protocol.ByteCount) { currentMTU = s },
|
||||
&logging.ConnectionTracer{
|
||||
UpdatedMTU: func(mtu logging.ByteCount, done bool) {
|
||||
tracedMTU = mtu
|
||||
tracerDone = done
|
||||
},
|
||||
)
|
||||
d.Start()
|
||||
now := time.Now()
|
||||
realMTU := protocol.ByteCount(rand.Intn(int(maxMTU-startMTU))) + startMTU
|
||||
t := now.Add(mtuProbeDelay * rtt)
|
||||
var count int
|
||||
for d.ShouldSendProbe(t) {
|
||||
if count > 25 {
|
||||
Fail("too many iterations")
|
||||
}
|
||||
count++
|
||||
|
||||
ping, size := d.GetPing()
|
||||
if size <= realMTU {
|
||||
ping.Handler.OnAcked(ping.Frame)
|
||||
} else {
|
||||
ping.Handler.OnLost(ping.Frame)
|
||||
}
|
||||
t = t.Add(mtuProbeDelay * rtt)
|
||||
},
|
||||
)
|
||||
d.Start()
|
||||
now := time.Now()
|
||||
realMTU := protocol.ByteCount(r.Intn(int(maxMTU-startMTU))) + startMTU
|
||||
fmt.Fprintf(GinkgoWriter, "MTU: %d, max: %d\n", realMTU, maxMTU)
|
||||
t := now.Add(mtuProbeDelay * rtt)
|
||||
var probes []protocol.ByteCount
|
||||
for d.ShouldSendProbe(t) {
|
||||
if len(probes) > 24 {
|
||||
Fail(fmt.Sprintf("too many iterations: %v", probes))
|
||||
}
|
||||
diff := realMTU - currentMTU
|
||||
Expect(diff).To(BeNumerically(">=", 0))
|
||||
maxDiff = max(maxDiff, diff)
|
||||
if maxMTU > currentMTU+maxMTU {
|
||||
Expect(tracedMTU).To(Equal(currentMTU))
|
||||
Expect(tracerDone).To(BeTrue())
|
||||
ping, size := d.GetPing()
|
||||
probes = append(probes, size)
|
||||
if size <= realMTU {
|
||||
ping.Handler.OnAcked(ping.Frame)
|
||||
} else {
|
||||
ping.Handler.OnLost(ping.Frame)
|
||||
}
|
||||
t = t.Add(mtuProbeDelay * rtt)
|
||||
}
|
||||
Expect(maxDiff).To(BeEquivalentTo(maxMTUDiff))
|
||||
diff := realMTU - currentMTU
|
||||
Expect(diff).To(BeNumerically(">=", 0))
|
||||
if maxMTU > currentMTU+maxMTU {
|
||||
Expect(tracedMTU).To(Equal(currentMTU))
|
||||
Expect(tracerDone).To(BeTrue())
|
||||
}
|
||||
fmt.Fprintf(GinkgoWriter, "MTU discovered: %d (diff: %d)\n", currentMTU, diff)
|
||||
fmt.Fprintf(GinkgoWriter, "probes sent (%d): %v\n", len(probes), probes)
|
||||
Expect(diff).To(BeNumerically("<=", maxMTUDiff))
|
||||
})
|
||||
|
||||
const maxRandomLoss = maxLostMTUProbes - 1
|
||||
It(fmt.Sprintf("finds the MTU, with up to %d packets lost", maxRandomLoss), MustPassRepeatedly(500), func() {
|
||||
maxMTU := protocol.ByteCount(r.Intn(int(3000-startMTU))) + startMTU + 1
|
||||
currentMTU := startMTU
|
||||
var tracedMTU protocol.ByteCount
|
||||
var tracerDone bool
|
||||
d := newMTUDiscoverer(
|
||||
rttStats,
|
||||
startMTU,
|
||||
maxMTU,
|
||||
func(s protocol.ByteCount) { currentMTU = s },
|
||||
&logging.ConnectionTracer{
|
||||
UpdatedMTU: func(mtu logging.ByteCount, done bool) {
|
||||
tracedMTU = mtu
|
||||
tracerDone = done
|
||||
},
|
||||
},
|
||||
)
|
||||
d.Start()
|
||||
now := time.Now()
|
||||
realMTU := protocol.ByteCount(r.Intn(int(maxMTU-startMTU))) + startMTU
|
||||
fmt.Fprintf(GinkgoWriter, "MTU: %d, max: %d\n", realMTU, maxMTU)
|
||||
t := now.Add(mtuProbeDelay * rtt)
|
||||
var probes, randomLosses []protocol.ByteCount
|
||||
for d.ShouldSendProbe(t) {
|
||||
if len(probes) > 32 {
|
||||
Fail(fmt.Sprintf("too many iterations: %v", probes))
|
||||
}
|
||||
ping, size := d.GetPing()
|
||||
probes = append(probes, size)
|
||||
packetFits := size <= realMTU
|
||||
var acked bool
|
||||
if packetFits {
|
||||
randomLoss := r.Intn(maxLostMTUProbes) == 0 && len(randomLosses) < maxRandomLoss
|
||||
if randomLoss {
|
||||
randomLosses = append(randomLosses, size)
|
||||
} else {
|
||||
ping.Handler.OnAcked(ping.Frame)
|
||||
acked = true
|
||||
}
|
||||
}
|
||||
if !acked {
|
||||
ping.Handler.OnLost(ping.Frame)
|
||||
}
|
||||
t = t.Add(mtuProbeDelay * rtt)
|
||||
}
|
||||
diff := realMTU - currentMTU
|
||||
Expect(diff).To(BeNumerically(">=", 0))
|
||||
if maxMTU > currentMTU+maxMTU {
|
||||
Expect(tracedMTU).To(Equal(currentMTU))
|
||||
Expect(tracerDone).To(BeTrue())
|
||||
}
|
||||
fmt.Fprintf(GinkgoWriter, "MTU discovered with random losses %v: %d (diff: %d)\n", randomLosses, currentMTU, diff)
|
||||
fmt.Fprintf(GinkgoWriter, "probes sent (%d): %v\n", len(probes), probes)
|
||||
Expect(diff).To(BeNumerically("<=", maxMTUDiff))
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user