Skip to content

Commit 13292ee

Browse files
author
Ferenc Szabo
committed
p2p/simulations: encapsulate Node.Up field so we avoid data races
The Node.Up field was accessed concurrently without "proper" locking. There was a lock on Network and that was used sometimes to access the field. Other times the locking was missed and we had a data race. For example: ethereum#18464 The case above was solved, but there were still intermittent/hard to reproduce races. So let's solve the issue permanently. resolves: ethersphere/swarm#1146
1 parent 4b2f34c commit 13292ee

File tree

9 files changed

+63
-47
lines changed

9 files changed

+63
-47
lines changed

p2p/simulations/events.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ func ControlEvent(v interface{}) *Event {
100100
func (e *Event) String() string {
101101
switch e.Type {
102102
case EventTypeNode:
103-
return fmt.Sprintf("<node-event> id: %s up: %t", e.Node.ID().TerminalString(), e.Node.Up)
103+
return fmt.Sprintf("<node-event> id: %s up: %t", e.Node.ID().TerminalString(), e.Node.Up())
104104
case EventTypeConn:
105105
return fmt.Sprintf("<conn-event> nodes: %s->%s up: %t", e.Conn.One.TerminalString(), e.Conn.Other.TerminalString(), e.Conn.Up)
106106
case EventTypeMsg:

p2p/simulations/http_test.go

+10-8
Original file line numberDiff line numberDiff line change
@@ -421,14 +421,15 @@ type expectEvents struct {
421421
}
422422

423423
func (t *expectEvents) nodeEvent(id string, up bool) *Event {
424+
node := Node{
425+
Config: &adapters.NodeConfig{
426+
ID: enode.HexID(id),
427+
},
428+
up: up,
429+
}
424430
return &Event{
425431
Type: EventTypeNode,
426-
Node: &Node{
427-
Config: &adapters.NodeConfig{
428-
ID: enode.HexID(id),
429-
},
430-
Up: up,
431-
},
432+
Node: &node,
432433
}
433434
}
434435

@@ -480,6 +481,7 @@ loop:
480481
}
481482

482483
func (t *expectEvents) expect(events ...*Event) {
484+
t.Helper()
483485
timeout := time.After(10 * time.Second)
484486
i := 0
485487
for {
@@ -501,8 +503,8 @@ func (t *expectEvents) expect(events ...*Event) {
501503
if event.Node.ID() != expected.Node.ID() {
502504
t.Fatalf("expected node event %d to have id %q, got %q", i, expected.Node.ID().TerminalString(), event.Node.ID().TerminalString())
503505
}
504-
if event.Node.Up != expected.Node.Up {
505-
t.Fatalf("expected node event %d to have up=%t, got up=%t", i, expected.Node.Up, event.Node.Up)
506+
if event.Node.Up() != expected.Node.Up() {
507+
t.Fatalf("expected node event %d to have up=%t, got up=%t", i, expected.Node.Up(), event.Node.Up())
506508
}
507509

508510
case EventTypeConn:

p2p/simulations/mocker_test.go

+5-4
Original file line numberDiff line numberDiff line change
@@ -90,15 +90,12 @@ func TestMocker(t *testing.T) {
9090
for {
9191
select {
9292
case event := <-events:
93-
//if the event is a node Up event only
94-
if event.Node != nil && event.Node.Up {
93+
if isNodeUp(event) {
9594
//add the correspondent node ID to the map
9695
nodemap[event.Node.Config.ID] = true
9796
//this means all nodes got a nodeUp event, so we can continue the test
9897
if len(nodemap) == nodeCount {
9998
nodesComplete = true
100-
//wait for 3s as the mocker will need time to connect the nodes
101-
//time.Sleep( 3 *time.Second)
10299
}
103100
} else if event.Conn != nil && nodesComplete {
104101
connCount += 1
@@ -169,3 +166,7 @@ func TestMocker(t *testing.T) {
169166
t.Fatalf("Expected empty list of nodes, got: %d", len(nodesInfo))
170167
}
171168
}
169+
170+
func isNodeUp(event *Event) bool {
171+
return event.Node != nil && event.Node.Up()
172+
}

p2p/simulations/network.go

+32-19
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ func (net *Network) Config() *NetworkConfig {
136136
// StartAll starts all nodes in the network
137137
func (net *Network) StartAll() error {
138138
for _, node := range net.Nodes {
139-
if node.Up {
139+
if node.Up() {
140140
continue
141141
}
142142
if err := net.Start(node.ID()); err != nil {
@@ -149,7 +149,7 @@ func (net *Network) StartAll() error {
149149
// StopAll stops all nodes in the network
150150
func (net *Network) StopAll() error {
151151
for _, node := range net.Nodes {
152-
if !node.Up {
152+
if !node.Up() {
153153
continue
154154
}
155155
if err := net.Stop(node.ID()); err != nil {
@@ -174,7 +174,7 @@ func (net *Network) startWithSnapshots(id enode.ID, snapshots map[string][]byte)
174174
net.lock.Unlock()
175175
return fmt.Errorf("node %v does not exist", id)
176176
}
177-
if node.Up {
177+
if node.Up() {
178178
net.lock.Unlock()
179179
return fmt.Errorf("node %v already up", id)
180180
}
@@ -184,7 +184,7 @@ func (net *Network) startWithSnapshots(id enode.ID, snapshots map[string][]byte)
184184
log.Warn("Node startup failed", "id", id, "err", err)
185185
return err
186186
}
187-
node.Up = true
187+
node.SetUp(true)
188188
log.Info("Started node", "id", id)
189189
ev := NewEvent(node)
190190
net.lock.Unlock()
@@ -219,7 +219,7 @@ func (net *Network) watchPeerEvents(id enode.ID, events chan *p2p.PeerEvent, sub
219219
if node == nil {
220220
return
221221
}
222-
node.Up = false
222+
node.SetUp(false)
223223
ev := NewEvent(node)
224224
net.events.Send(ev)
225225
}()
@@ -263,17 +263,17 @@ func (net *Network) Stop(id enode.ID) error {
263263
net.lock.Unlock()
264264
return fmt.Errorf("node %v does not exist", id)
265265
}
266-
if !node.Up {
266+
if !node.Up() {
267267
net.lock.Unlock()
268268
return fmt.Errorf("node %v already down", id)
269269
}
270-
node.Up = false
270+
node.SetUp(false)
271271
net.lock.Unlock()
272272

273273
err := node.Stop()
274274
if err != nil {
275275
net.lock.Lock()
276-
node.Up = true
276+
node.SetUp(true)
277277
net.lock.Unlock()
278278
return err
279279
}
@@ -430,7 +430,7 @@ func (net *Network) GetRandomUpNode(excludeIDs ...enode.ID) *Node {
430430

431431
func (net *Network) getUpNodeIDs() (ids []enode.ID) {
432432
for _, node := range net.Nodes {
433-
if node.Up {
433+
if node.Up() {
434434
ids = append(ids, node.ID())
435435
}
436436
}
@@ -446,7 +446,7 @@ func (net *Network) GetRandomDownNode(excludeIDs ...enode.ID) *Node {
446446

447447
func (net *Network) getDownNodeIDs() (ids []enode.ID) {
448448
for _, node := range net.GetNodes() {
449-
if !node.Up {
449+
if !node.Up() {
450450
ids = append(ids, node.ID())
451451
}
452452
}
@@ -595,8 +595,21 @@ type Node struct {
595595
// Config if the config used to created the node
596596
Config *adapters.NodeConfig `json:"config"`
597597

598-
// Up tracks whether or not the node is running
599-
Up bool `json:"up"`
598+
// up tracks whether or not the node is running
599+
up bool `json:"up"`
600+
upMu sync.RWMutex `json:"-"`
601+
}
602+
603+
func (n *Node) Up() bool {
604+
n.upMu.RLock()
605+
defer n.upMu.RUnlock()
606+
return n.up
607+
}
608+
609+
func (n *Node) SetUp(up bool) {
610+
n.upMu.Lock()
611+
defer n.upMu.Unlock()
612+
n.up = up
600613
}
601614

602615
// ID returns the ID of the node
@@ -630,7 +643,7 @@ func (n *Node) MarshalJSON() ([]byte, error) {
630643
}{
631644
Info: n.NodeInfo(),
632645
Config: n.Config,
633-
Up: n.Up,
646+
Up: n.Up(),
634647
})
635648
}
636649

@@ -653,10 +666,10 @@ type Conn struct {
653666

654667
// nodesUp returns whether both nodes are currently up
655668
func (c *Conn) nodesUp() error {
656-
if !c.one.Up {
669+
if !c.one.Up() {
657670
return fmt.Errorf("one %v is not up", c.One)
658671
}
659-
if !c.other.Up {
672+
if !c.other.Up() {
660673
return fmt.Errorf("other %v is not up", c.Other)
661674
}
662675
return nil
@@ -728,7 +741,7 @@ func (net *Network) snapshot(addServices []string, removeServices []string) (*Sn
728741
}
729742
for i, node := range net.Nodes {
730743
snap.Nodes[i] = NodeSnapshot{Node: *node}
731-
if !node.Up {
744+
if !node.Up() {
732745
continue
733746
}
734747
snapshots, err := node.Snapshots()
@@ -783,7 +796,7 @@ func (net *Network) Load(snap *Snapshot) error {
783796
if _, err := net.NewNodeWithConfig(n.Node.Config); err != nil {
784797
return err
785798
}
786-
if !n.Node.Up {
799+
if !n.Node.Up() {
787800
continue
788801
}
789802
if err := net.startWithSnapshots(n.Node.Config.ID, n.Snapshots); err != nil {
@@ -855,7 +868,7 @@ func (net *Network) Load(snap *Snapshot) error {
855868
// Start connecting.
856869
for _, conn := range snap.Conns {
857870

858-
if !net.GetNode(conn.One).Up || !net.GetNode(conn.Other).Up {
871+
if !net.GetNode(conn.One).Up() || !net.GetNode(conn.Other).Up() {
859872
//in this case, at least one of the nodes of a connection is not up,
860873
//so it would result in the snapshot `Load` to fail
861874
continue
@@ -909,7 +922,7 @@ func (net *Network) executeControlEvent(event *Event) {
909922
}
910923

911924
func (net *Network) executeNodeEvent(e *Event) error {
912-
if !e.Node.Up {
925+
if !e.Node.Up() {
913926
return net.Stop(e.Node.ID())
914927
}
915928

swarm/network/simulation/node.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ func (s *Simulation) NodeIDs() (ids []enode.ID) {
4444
func (s *Simulation) UpNodeIDs() (ids []enode.ID) {
4545
nodes := s.Net.GetNodes()
4646
for _, node := range nodes {
47-
if node.Up {
47+
if node.Up() {
4848
ids = append(ids, node.ID())
4949
}
5050
}
@@ -55,7 +55,7 @@ func (s *Simulation) UpNodeIDs() (ids []enode.ID) {
5555
func (s *Simulation) DownNodeIDs() (ids []enode.ID) {
5656
nodes := s.Net.GetNodes()
5757
for _, node := range nodes {
58-
if !node.Up {
58+
if !node.Up() {
5959
ids = append(ids, node.ID())
6060
}
6161
}

swarm/network/simulation/node_test.go

+9-9
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ func TestUpDownNodeIDs(t *testing.T) {
5454
gotIDs = sim.UpNodeIDs()
5555

5656
for _, id := range gotIDs {
57-
if !sim.Net.GetNode(id).Up {
57+
if !sim.Net.GetNode(id).Up() {
5858
t.Errorf("node %s should not be down", id)
5959
}
6060
}
@@ -66,7 +66,7 @@ func TestUpDownNodeIDs(t *testing.T) {
6666
gotIDs = sim.DownNodeIDs()
6767

6868
for _, id := range gotIDs {
69-
if sim.Net.GetNode(id).Up {
69+
if sim.Net.GetNode(id).Up() {
7070
t.Errorf("node %s should not be up", id)
7171
}
7272
}
@@ -112,7 +112,7 @@ func TestAddNode(t *testing.T) {
112112
t.Fatal("node not found")
113113
}
114114

115-
if !n.Up {
115+
if !n.Up() {
116116
t.Error("node not started")
117117
}
118118
}
@@ -327,15 +327,15 @@ func TestStartStopNode(t *testing.T) {
327327
if n == nil {
328328
t.Fatal("node not found")
329329
}
330-
if !n.Up {
330+
if !n.Up() {
331331
t.Error("node not started")
332332
}
333333

334334
err = sim.StopNode(id)
335335
if err != nil {
336336
t.Fatal(err)
337337
}
338-
if n.Up {
338+
if n.Up() {
339339
t.Error("node not stopped")
340340
}
341341

@@ -345,7 +345,7 @@ func TestStartStopNode(t *testing.T) {
345345
if err != nil {
346346
t.Fatal(err)
347347
}
348-
if !n.Up {
348+
if !n.Up() {
349349
t.Error("node not started")
350350
}
351351
}
@@ -368,7 +368,7 @@ func TestStartStopRandomNode(t *testing.T) {
368368
if n == nil {
369369
t.Fatal("node not found")
370370
}
371-
if n.Up {
371+
if n.Up() {
372372
t.Error("node not stopped")
373373
}
374374

@@ -408,7 +408,7 @@ func TestStartStopRandomNodes(t *testing.T) {
408408
if n == nil {
409409
t.Fatal("node not found")
410410
}
411-
if n.Up {
411+
if n.Up() {
412412
t.Error("node not stopped")
413413
}
414414
}
@@ -425,7 +425,7 @@ func TestStartStopRandomNodes(t *testing.T) {
425425
if n == nil {
426426
t.Fatal("node not found")
427427
}
428-
if !n.Up {
428+
if !n.Up() {
429429
t.Error("node not started")
430430
}
431431
}

swarm/network/simulation/service.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ func (s *Simulation) Services(name string) (services map[enode.ID]node.Service)
5252
nodes := s.Net.GetNodes()
5353
services = make(map[enode.ID]node.Service)
5454
for _, node := range nodes {
55-
if !node.Up {
55+
if !node.Up() {
5656
continue
5757
}
5858
simNode, ok := node.Node.(*adapters.SimNode)

swarm/network/simulation/simulation_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ func TestClose(t *testing.T) {
124124

125125
var upNodeCount int
126126
for _, n := range sim.Net.GetNodes() {
127-
if n.Up {
127+
if n.Up() {
128128
upNodeCount++
129129
}
130130
}
@@ -140,7 +140,7 @@ func TestClose(t *testing.T) {
140140

141141
upNodeCount = 0
142142
for _, n := range sim.Net.GetNodes() {
143-
if n.Up {
143+
if n.Up() {
144144
upNodeCount++
145145
}
146146
}

swarm/network/simulations/overlay_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ func watchSimEvents(net *simulations.Network, ctx context.Context, trigger chan
178178
case ev := <-events:
179179
//only catch node up events
180180
if ev.Type == simulations.EventTypeNode {
181-
if ev.Node.Up {
181+
if ev.Node.Up() {
182182
log.Debug("got node up event", "event", ev, "node", ev.Node.Config.ID)
183183
select {
184184
case trigger <- ev.Node.Config.ID:

0 commit comments

Comments
 (0)