Skip to content

Commit 2af2472

Browse files
holisticodezelig
authored andcommitted
swarm/network: Saturation check for healthy networks (ethereum#19071)
* swarm/network: new saturation for implementation * swarm/network: re-added saturation func in Kademlia as it is used elsewhere * swarm/network: saturation with higher MinBinSize * swarm/network: PeersPerBin with depth check * swarm/network: edited tests to pass new saturated check * swarm/network: minor fix saturated check * swarm/network/simulations/discovery: fixed renamed RPC call * swarm/network: renamed to isSaturated and returns bool * swarm/network: early depth check
1 parent fab8c5a commit 2af2472

File tree

4 files changed

+179
-29
lines changed

4 files changed

+179
-29
lines changed

swarm/network/kademlia.go

+64-10
Original file line numberDiff line numberDiff line change
@@ -628,7 +628,8 @@ func (k *Kademlia) string() string {
628628
// used for testing only
629629
// TODO move to separate testing tools file
630630
type PeerPot struct {
631-
NNSet [][]byte
631+
NNSet [][]byte
632+
PeersPerBin []int
632633
}
633634

634635
// NewPeerPotMap creates a map of pot record of *BzzAddr with keys
@@ -654,6 +655,7 @@ func NewPeerPotMap(neighbourhoodSize int, addrs [][]byte) map[string]*PeerPot {
654655

655656
// all nn-peers
656657
var nns [][]byte
658+
peersPerBin := make([]int, depth)
657659

658660
// iterate through the neighbours, going from the deepest to the shallowest
659661
np.EachNeighbour(a, Pof, func(val pot.Val, po int) bool {
@@ -667,14 +669,18 @@ func NewPeerPotMap(neighbourhoodSize int, addrs [][]byte) map[string]*PeerPot {
667669
// a neighbor is any peer in or deeper than the depth
668670
if po >= depth {
669671
nns = append(nns, addr)
670-
return true
672+
} else {
673+
// for peers < depth, we just count the number in each bin
674+
// the bin is the index of the slice
675+
peersPerBin[po]++
671676
}
672-
return false
677+
return true
673678
})
674679

675-
log.Trace(fmt.Sprintf("%x PeerPotMap NNS: %s", addrs[i][:4], LogAddrs(nns)))
680+
log.Trace(fmt.Sprintf("%x PeerPotMap NNS: %s, peersPerBin", addrs[i][:4], LogAddrs(nns)))
676681
ppmap[common.Bytes2Hex(a)] = &PeerPot{
677-
NNSet: nns,
682+
NNSet: nns,
683+
PeersPerBin: peersPerBin,
678684
}
679685
}
680686
return ppmap
@@ -698,6 +704,39 @@ func (k *Kademlia) saturation() int {
698704
return prev
699705
}
700706

707+
// isSaturated returns true if the kademlia is considered saturated, or false if not.
708+
// It checks this by checking an array of ints called unsaturatedBins; each item in that array corresponds
709+
// to the bin which is unsaturated (number of connections < k.MinBinSize).
710+
// The bin is considered unsaturated only if there are actual peers in that PeerPot's bin (peersPerBin)
711+
// (if there is no peer for a given bin, then no connection could ever be established;
712+
// in a God's view this is relevant as no more peers will ever appear on that bin)
713+
func (k *Kademlia) isSaturated(peersPerBin []int, depth int) bool {
714+
// depth could be calculated from k but as this is called from `GetHealthInfo()`,
715+
// the depth has already been calculated so we can require it as a parameter
716+
717+
// early check for depth
718+
if depth != len(peersPerBin) {
719+
return false
720+
}
721+
unsaturatedBins := make([]int, 0)
722+
k.conns.EachBin(k.base, Pof, 0, func(po, size int, f func(func(val pot.Val) bool) bool) bool {
723+
724+
if po >= depth {
725+
return false
726+
}
727+
log.Trace("peers per bin", "peersPerBin[po]", peersPerBin[po], "po", po)
728+
// if there are actually peers in the PeerPot who can fulfill k.MinBinSize
729+
if size < k.MinBinSize && size < peersPerBin[po] {
730+
log.Trace("connections for po", "po", po, "size", size)
731+
unsaturatedBins = append(unsaturatedBins, po)
732+
}
733+
return true
734+
})
735+
736+
log.Trace("list of unsaturated bins", "unsaturatedBins", unsaturatedBins)
737+
return len(unsaturatedBins) == 0
738+
}
739+
701740
// knowNeighbours tests if all neighbours in the peerpot
702741
// are found among the peers known to the kademlia
703742
// It is used in Healthy function for testing only
@@ -780,19 +819,21 @@ type Health struct {
780819
ConnectNN bool // whether node is connected to all its neighbours
781820
CountConnectNN int // amount of neighbours connected to
782821
MissingConnectNN [][]byte // which neighbours we should have been connected to but we're not
783-
Saturated bool // whether we are connected to all the peers we would have liked to
784-
Hive string
822+
// Saturated: if in all bins < depth number of connections >= MinBinsize or,
823+
// if number of connections < MinBinSize, to the number of available peers in that bin
824+
Saturated bool
825+
Hive string
785826
}
786827

787-
// Healthy reports the health state of the kademlia connectivity
828+
// GetHealthInfo reports the health state of the kademlia connectivity
788829
//
789830
// The PeerPot argument provides an all-knowing view of the network
790831
// The resulting Health object is a result of comparisons between
791832
// what is the actual composition of the kademlia in question (the receiver), and
792833
// what SHOULD it have been when we take all we know about the network into consideration.
793834
//
794835
// used for testing only
795-
func (k *Kademlia) Healthy(pp *PeerPot) *Health {
836+
func (k *Kademlia) GetHealthInfo(pp *PeerPot) *Health {
796837
k.lock.RLock()
797838
defer k.lock.RUnlock()
798839
if len(pp.NNSet) < k.NeighbourhoodSize {
@@ -801,7 +842,10 @@ func (k *Kademlia) Healthy(pp *PeerPot) *Health {
801842
gotnn, countgotnn, culpritsgotnn := k.connectedNeighbours(pp.NNSet)
802843
knownn, countknownn, culpritsknownn := k.knowNeighbours(pp.NNSet)
803844
depth := depthForPot(k.conns, k.NeighbourhoodSize, k.base)
804-
saturated := k.saturation() < depth
845+
846+
// check saturation
847+
saturated := k.isSaturated(pp.PeersPerBin, depth)
848+
805849
log.Trace(fmt.Sprintf("%08x: healthy: knowNNs: %v, gotNNs: %v, saturated: %v\n", k.base, knownn, gotnn, saturated))
806850
return &Health{
807851
KnowNN: knownn,
@@ -814,3 +858,13 @@ func (k *Kademlia) Healthy(pp *PeerPot) *Health {
814858
Hive: k.string(),
815859
}
816860
}
861+
862+
// Healthy return the strict interpretation of `Healthy` given a `Health` struct
863+
// definition of strict health: all conditions must be true:
864+
// - we at least know one peer
865+
// - we know all neighbors
866+
// - we are connected to all known neighbors
867+
// - it is saturated
868+
func (h *Health) Healthy() bool {
869+
return h.KnowNN && h.ConnectNN && h.CountKnowNN > 0 && h.Saturated
870+
}

swarm/network/kademlia_test.go

+111-15
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,46 @@ func TestNeighbourhoodDepth(t *testing.T) {
168168
testNum++
169169
}
170170

171+
// TestHighMinBinSize tests that the saturation function also works
172+
// if MinBinSize is > 2, the connection count is < k.MinBinSize
173+
// and there are more peers available than connected
174+
func TestHighMinBinSize(t *testing.T) {
175+
// a function to test for different MinBinSize values
176+
testKad := func(minBinSize int) {
177+
// create a test kademlia
178+
tk := newTestKademlia(t, "11111111")
179+
// set its MinBinSize to desired value
180+
tk.KadParams.MinBinSize = minBinSize
181+
182+
// add a couple of peers (so we have NN and depth)
183+
tk.On("00000000") // bin 0
184+
tk.On("11100000") // bin 3
185+
tk.On("11110000") // bin 4
186+
187+
first := "10000000" // add a first peer at bin 1
188+
tk.Register(first) // register it
189+
// we now have one registered peer at bin 1;
190+
// iterate and connect one peer at each iteration;
191+
// should be unhealthy until at minBinSize - 1
192+
// we connect the unconnected but registered peer
193+
for i := 1; i < minBinSize; i++ {
194+
peer := fmt.Sprintf("1000%b", 8|i)
195+
tk.On(peer)
196+
if i == minBinSize-1 {
197+
tk.On(first)
198+
tk.checkHealth(true)
199+
return
200+
}
201+
tk.checkHealth(false)
202+
}
203+
}
204+
// test MinBinSizes of 3 to 5
205+
testMinBinSizes := []int{3, 4, 5}
206+
for _, k := range testMinBinSizes {
207+
testKad(k)
208+
}
209+
}
210+
171211
// TestHealthStrict tests the simplest definition of health
172212
// Which means whether we are connected to all neighbors we know of
173213
func TestHealthStrict(t *testing.T) {
@@ -176,60 +216,116 @@ func TestHealthStrict(t *testing.T) {
176216
// no peers
177217
// unhealthy (and lonely)
178218
tk := newTestKademlia(t, "11111111")
179-
tk.checkHealth(false, false)
219+
tk.checkHealth(false)
180220

181221
// know one peer but not connected
182222
// unhealthy
183223
tk.Register("11100000")
184-
tk.checkHealth(false, false)
224+
tk.checkHealth(false)
185225

186226
// know one peer and connected
187-
// healthy
227+
// unhealthy: not saturated
188228
tk.On("11100000")
189-
tk.checkHealth(true, false)
229+
tk.checkHealth(true)
190230

191231
// know two peers, only one connected
192232
// unhealthy
193233
tk.Register("11111100")
194-
tk.checkHealth(false, false)
234+
tk.checkHealth(false)
195235

196236
// know two peers and connected to both
197237
// healthy
198238
tk.On("11111100")
199-
tk.checkHealth(true, false)
239+
tk.checkHealth(true)
200240

201241
// know three peers, connected to the two deepest
202242
// healthy
203243
tk.Register("00000000")
204-
tk.checkHealth(true, false)
244+
tk.checkHealth(false)
205245

206246
// know three peers, connected to all three
207247
// healthy
208248
tk.On("00000000")
209-
tk.checkHealth(true, false)
249+
tk.checkHealth(true)
210250

211251
// add fourth peer deeper than current depth
212252
// unhealthy
213253
tk.Register("11110000")
214-
tk.checkHealth(false, false)
254+
tk.checkHealth(false)
215255

216256
// connected to three deepest peers
217257
// healthy
218258
tk.On("11110000")
219-
tk.checkHealth(true, false)
259+
tk.checkHealth(true)
220260

221261
// add additional peer in same bin as deepest peer
222262
// unhealthy
223263
tk.Register("11111101")
224-
tk.checkHealth(false, false)
264+
tk.checkHealth(false)
225265

226266
// four deepest of five peers connected
227267
// healthy
228268
tk.On("11111101")
229-
tk.checkHealth(true, false)
269+
tk.checkHealth(true)
270+
271+
// add additional peer in bin 0
272+
// unhealthy: unsaturated bin 0, 2 known but 1 connected
273+
tk.Register("00000001")
274+
tk.checkHealth(false)
275+
276+
// Connect second in bin 0
277+
// healthy
278+
tk.On("00000001")
279+
tk.checkHealth(true)
280+
281+
// add peer in bin 1
282+
// unhealthy, as it is known but not connected
283+
tk.Register("10000000")
284+
tk.checkHealth(false)
285+
286+
// connect peer in bin 1
287+
// depth change, is now 1
288+
// healthy, 1 peer in bin 1 known and connected
289+
tk.On("10000000")
290+
tk.checkHealth(true)
291+
292+
// add second peer in bin 1
293+
// unhealthy, as it is known but not connected
294+
tk.Register("10000001")
295+
tk.checkHealth(false)
296+
297+
// connect second peer in bin 1
298+
// healthy,
299+
tk.On("10000001")
300+
tk.checkHealth(true)
301+
302+
// connect third peer in bin 1
303+
// healthy,
304+
tk.On("10000011")
305+
tk.checkHealth(true)
306+
307+
// add peer in bin 2
308+
// unhealthy, no depth change
309+
tk.Register("11000000")
310+
tk.checkHealth(false)
311+
312+
// connect peer in bin 2
313+
// depth change - as we already have peers in bin 3 and 4,
314+
// we have contiguous bins, no bin < po 5 is empty -> depth 5
315+
// healthy, every bin < depth has the max available peers,
316+
// even if they are < MinBinSize
317+
tk.On("11000000")
318+
tk.checkHealth(true)
319+
320+
// add peer in bin 2
321+
// unhealthy, peer bin is below depth 5 but
322+
// has more available peers (2) than connected ones (1)
323+
// --> unsaturated
324+
tk.Register("11000011")
325+
tk.checkHealth(false)
230326
}
231327

232-
func (tk *testKademlia) checkHealth(expectHealthy bool, expectSaturation bool) {
328+
func (tk *testKademlia) checkHealth(expectHealthy bool) {
233329
tk.t.Helper()
234330
kid := common.Bytes2Hex(tk.BaseAddr())
235331
addrs := [][]byte{tk.BaseAddr()}
@@ -239,13 +335,13 @@ func (tk *testKademlia) checkHealth(expectHealthy bool, expectSaturation bool) {
239335
})
240336

241337
pp := NewPeerPotMap(tk.NeighbourhoodSize, addrs)
242-
healthParams := tk.Healthy(pp[kid])
338+
healthParams := tk.GetHealthInfo(pp[kid])
243339

244340
// definition of health, all conditions but be true:
245341
// - we at least know one peer
246342
// - we know all neighbors
247343
// - we are connected to all known neighbors
248-
health := healthParams.KnowNN && healthParams.ConnectNN && healthParams.CountKnowNN > 0
344+
health := healthParams.Healthy()
249345
if expectHealthy != health {
250346
tk.t.Fatalf("expected kademlia health %v, is %v\n%v", expectHealthy, health, tk.String())
251347
}

swarm/network/simulation/kademlia.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net
6464
addr := common.Bytes2Hex(k.BaseAddr())
6565
pp := ppmap[addr]
6666
//call Healthy RPC
67-
h := k.Healthy(pp)
67+
h := k.GetHealthInfo(pp)
6868
//print info
6969
log.Debug(k.String())
7070
log.Debug("kademlia", "connectNN", h.ConnectNN, "knowNN", h.KnowNN)

swarm/network/simulations/discovery/discovery_test.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ func discoverySimulation(nodes, conns int, adapter adapters.NodeAdapter) (*simul
267267
}
268268

269269
healthy := &network.Health{}
270-
if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
270+
if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
271271
return false, fmt.Errorf("error getting node health: %s", err)
272272
}
273273
log.Debug(fmt.Sprintf("node %4s healthy: connected nearest neighbours: %v, know nearest neighbours: %v,\n\n%v", id, healthy.ConnectNN, healthy.KnowNN, healthy.Hive))
@@ -352,7 +352,7 @@ func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapt
352352
healthy := &network.Health{}
353353
addr := id.String()
354354
ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)
355-
if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
355+
if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
356356
return fmt.Errorf("error getting node health: %s", err)
357357
}
358358

@@ -422,7 +422,7 @@ func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapt
422422
healthy := &network.Health{}
423423
ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs)
424424

425-
if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
425+
if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil {
426426
return false, fmt.Errorf("error getting node health: %s", err)
427427
}
428428
log.Info(fmt.Sprintf("node %4s healthy: got nearest neighbours: %v, know nearest neighbours: %v", id, healthy.ConnectNN, healthy.KnowNN))

0 commit comments

Comments
 (0)