Skip to content

Commit 280102c

Browse files
authored
metrics: make metrics easier to use with prometheus (#4020)
* make TagCounter metrics easier to use with prometheus * ensure 0 counters are logged * allow for pre-declaring TagCounter tags for use with prometheus * fix expected in TestTagCounterWriteMetric * deregister counter used in test * fix lint warning * CR comment * Log incorrect metrics for debugging test failures * deregister more counters and tagcounters used by tests * remove unused Segment
1 parent 8088e04 commit 280102c

11 files changed

+111
-243
lines changed

agreement/gossip/network.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,11 @@ import (
3232
)
3333

3434
var messagesHandledTotal = metrics.MakeCounter(metrics.AgreementMessagesHandled)
35-
var messagesHandledByType = metrics.NewTagCounter("algod_agreement_handled_{TAG}", "Number of agreement messages handled per type")
35+
var messagesHandledByType = metrics.NewTagCounter("algod_agreement_handled_{TAG}", "Number of agreement {TAG} messages handled",
36+
agreementVoteMessageType, agreementProposalMessageType, agreementBundleMessageType)
3637
var messagesDroppedTotal = metrics.MakeCounter(metrics.AgreementMessagesDropped)
37-
var messagesDroppedByType = metrics.NewTagCounter("algod_agreement_dropped_{TAG}", "Number of agreement messages handled per type")
38+
var messagesDroppedByType = metrics.NewTagCounter("algod_agreement_dropped_{TAG}", "Number of agreement {TAG} messages dropped",
39+
agreementVoteMessageType, agreementProposalMessageType, agreementBundleMessageType)
3840

3941
const (
4042
agreementVoteMessageType = "vote"

agreement/pseudonode.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ var errPseudonodeVerifierClosedChannel = errors.New("crypto verifier closed the
4444
var errPseudonodeNoVotes = errors.New("no valid participation keys to generate votes for given round")
4545
var errPseudonodeNoProposals = errors.New("no valid participation keys to generate proposals for given round")
4646

47-
var pseudonodeBacklogFullByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_dropped_{TAG}", "Number of pseudonode tasks dropped per type")
48-
var pseudonodeResultTimeoutsByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_timeouts_{TAG}", "Number of pseudonode task result timeouts per type")
47+
var pseudonodeBacklogFullByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_dropped_{TAG}", "Number of pseudonode {TAG} tasks dropped", "proposal", "vote")
48+
var pseudonodeResultTimeoutsByType = metrics.NewTagCounter("algod_agreement_pseudonode_tasks_timeouts_{TAG}", "Number of pseudonode {TAG} task result timeouts", "vote", "pvote", "ppayload")
4949

5050
// A pseudonode creates proposals and votes with a KeyManager which holds participation keys.
5151
//

network/wsPeer.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,14 @@ const averageMessageLength = 2 * 1024 // Most of the messages are smaller tha
4848
const msgsInReadBufferPerPeer = 10
4949

5050
var networkSentBytesTotal = metrics.MakeCounter(metrics.NetworkSentBytesTotal)
51-
var networkSentBytesByTag = metrics.NewTagCounter("algod_network_sent_bytes_{TAG}", "Number of bytes that were sent over the network per message tag")
51+
var networkSentBytesByTag = metrics.NewTagCounter("algod_network_sent_bytes_{TAG}", "Number of bytes that were sent over the network for {TAG} messages")
5252
var networkReceivedBytesTotal = metrics.MakeCounter(metrics.NetworkReceivedBytesTotal)
53-
var networkReceivedBytesByTag = metrics.NewTagCounter("algod_network_received_bytes_{TAG}", "Number of bytes that were received from the network per message tag")
53+
var networkReceivedBytesByTag = metrics.NewTagCounter("algod_network_received_bytes_{TAG}", "Number of bytes that were received from the network for {TAG} messages")
5454

5555
var networkMessageReceivedTotal = metrics.MakeCounter(metrics.NetworkMessageReceivedTotal)
56-
var networkMessageReceivedByTag = metrics.NewTagCounter("algod_network_message_received_{TAG}", "Number of complete messages that were received from the network per message tag")
56+
var networkMessageReceivedByTag = metrics.NewTagCounter("algod_network_message_received_{TAG}", "Number of complete messages that were received from the network for {TAG} messages")
5757
var networkMessageSentTotal = metrics.MakeCounter(metrics.NetworkMessageSentTotal)
58-
var networkMessageSentByTag = metrics.NewTagCounter("algod_network_message_sent_{TAG}", "Number of complete messages that were sent to the network per message tag")
58+
var networkMessageSentByTag = metrics.NewTagCounter("algod_network_message_sent_{TAG}", "Number of complete messages that were sent to the network for {TAG} messages")
5959

6060
var networkConnectionsDroppedTotal = metrics.MakeCounter(metrics.NetworkConnectionsDroppedTotal)
6161
var networkMessageQueueMicrosTotal = metrics.MakeCounter(metrics.MetricName{Name: "algod_network_message_sent_queue_micros_total", Description: "Total microseconds message spent waiting in queue to be sent"})

util/metrics/counter.go

+11-3
Original file line numberDiff line numberDiff line change
@@ -155,16 +155,24 @@ func (counter *Counter) WriteMetric(buf *strings.Builder, parentLabels string) {
155155
counter.Lock()
156156
defer counter.Unlock()
157157

158-
if len(counter.values) < 1 {
159-
return
160-
}
161158
buf.WriteString("# HELP ")
162159
buf.WriteString(counter.name)
163160
buf.WriteString(" ")
164161
buf.WriteString(counter.description)
165162
buf.WriteString("\n# TYPE ")
166163
buf.WriteString(counter.name)
167164
buf.WriteString(" counter\n")
165+
// if counter is zero, report 0 using parentLabels and no tags
166+
if len(counter.values) == 0 {
167+
buf.WriteString(counter.name)
168+
if len(parentLabels) > 0 {
169+
buf.WriteString("{" + parentLabels + "}")
170+
}
171+
buf.WriteString(" 0")
172+
buf.WriteString("\n")
173+
return
174+
}
175+
// otherwise iterate through values and write one line per label
168176
for _, l := range counter.values {
169177
buf.WriteString(counter.name)
170178
buf.WriteString("{")

util/metrics/counter_test.go

+30-3
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package metrics
1919
import (
2020
"context"
2121
"fmt"
22+
"strings"
2223
"testing"
2324
"time"
2425

@@ -67,7 +68,7 @@ func TestMetricCounter(t *testing.T) {
6768
defer test.Unlock()
6869
// the the loop above we've created a single metric name with five different labels set ( host0, host1 .. host 4)
6970
// let's see if we received all the 5 different labels.
70-
require.Equal(t, 5, len(test.metrics), "Missing metric counts were reported.")
71+
require.Equal(t, 5, len(test.metrics), "Missing metric counts were reported: %+v", test.metrics)
7172

7273
for k, v := range test.metrics {
7374
// we have increased each one of the labels exactly 4 times. See that the counter was counting correctly.
@@ -114,7 +115,7 @@ func TestMetricCounterFastInts(t *testing.T) {
114115
defer test.Unlock()
115116
// the the loop above we've created a single metric name with five different labels set ( host0, host1 .. host 4)
116117
// let's see if we received all the 5 different labels.
117-
require.Equal(t, 1, len(test.metrics), "Missing metric counts were reported.")
118+
require.Equal(t, 1, len(test.metrics), "Missing metric counts were reported: %+v", test.metrics)
118119

119120
for k, v := range test.metrics {
120121
// we have increased each one of the labels exactly 4 times. See that the counter was counting correctly.
@@ -163,11 +164,37 @@ func TestMetricCounterMixed(t *testing.T) {
163164
defer test.Unlock()
164165
// the the loop above we've created a single metric name with five different labels set ( host0, host1 .. host 4)
165166
// let's see if we received all the 5 different labels.
166-
require.Equal(t, 1, len(test.metrics), "Missing metric counts were reported.")
167+
require.Equal(t, 1, len(test.metrics), "Missing metric counts were reported: %+v", test.metrics)
167168

168169
for k, v := range test.metrics {
169170
// we have increased each one of the labels exactly 4 times. See that the counter was counting correctly.
170171
// ( counters starts at zero )
171172
require.Equal(t, "35.5", v, fmt.Sprintf("The metric '%s' reached value '%s'", k, v))
172173
}
173174
}
175+
176+
func TestCounterWriteMetric(t *testing.T) {
177+
partitiontest.PartitionTest(t)
178+
179+
c := MakeCounter(MetricName{Name: "testname", Description: "testhelp"})
180+
c.Deregister(nil)
181+
182+
// ensure 0 counters are still logged
183+
sbOut := strings.Builder{}
184+
c.WriteMetric(&sbOut, `host="myhost"`)
185+
expected := `# HELP testname testhelp
186+
# TYPE testname counter
187+
testname{host="myhost"} 0
188+
`
189+
require.Equal(t, expected, sbOut.String())
190+
191+
c.Add(2.3, nil)
192+
// ensure non-zero counters are logged
193+
sbOut = strings.Builder{}
194+
c.WriteMetric(&sbOut, `host="myhost"`)
195+
expected = `# HELP testname testhelp
196+
# TYPE testname counter
197+
testname{host="myhost"} 2.3
198+
`
199+
require.Equal(t, expected, sbOut.String())
200+
}

util/metrics/gauge_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ func TestMetricGauge(t *testing.T) {
6868

6969
// the the loop above we've created a single metric name with five different labels set ( host0, host1 .. host 4)
7070
// let's see if we received all the 5 different labels.
71-
require.Equal(t, 5, len(test.metrics), "Missing metric counts were reported.")
71+
require.Equal(t, 5, len(test.metrics), "Missing metric counts were reported: %+v", test.metrics)
7272

7373
// iterate through the metrics and check the each of the metrics reached it's correct count.
7474
for k, v := range test.metrics {

util/metrics/registry_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ func TestWriteAdd(t *testing.T) {
3737
results := make(map[string]float64)
3838
DefaultRegistry().AddMetrics(results)
3939

40-
require.Equal(t, 2, len(results))
40+
require.Equal(t, 2, len(results), "results", results)
4141
require.Contains(t, results, "gauge-name")
4242
require.InDelta(t, 12.34, results["gauge-name"], 0.01)
4343
require.Contains(t, results, "label-counter_label__a_label_value_")

util/metrics/segment.go

-90
This file was deleted.

util/metrics/segment_test.go

-119
This file was deleted.

0 commit comments

Comments
 (0)