Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

metrics: specify telemetry hostname and instance name as prometheus labels #4089

Merged
merged 4 commits into from
Jun 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion daemon/algod/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ func (s *Server) Initialize(cfg config.Local, phonebookAddresses []string, genes
fmt.Fprintln(logWriter, "Logging Starting")
if s.log.GetTelemetryUploadingEnabled() {
// May or may not be logging to node.log
fmt.Fprintf(logWriter, "Telemetry Enabled: %s\n", s.log.GetTelemetryHostName())
fmt.Fprintf(logWriter, "Telemetry Enabled: %s\n", s.log.GetTelemetryGUID())
fmt.Fprintf(logWriter, "Session: %s\n", s.log.GetTelemetrySession())
} else {
// May or may not be logging to node.log
Expand All @@ -158,6 +158,12 @@ func (s *Server) Initialize(cfg config.Local, phonebookAddresses []string, genes
metricLabels := map[string]string{}
if s.log.GetTelemetryEnabled() {
metricLabels["telemetry_session"] = s.log.GetTelemetrySession()
if h := s.log.GetTelemetryGUID(); h != "" {
metricLabels["telemetry_host"] = h
}
if i := s.log.GetInstanceName(); i != "" {
metricLabels["telemetry_instance"] = i
}
}
s.metricCollector = metrics.MakeMetricService(
&metrics.ServiceConfig{
Expand Down
6 changes: 3 additions & 3 deletions logging/log.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ type Logger interface {
EventWithDetails(category telemetryspec.Category, identifier telemetryspec.Event, details interface{})
StartOperation(category telemetryspec.Category, identifier telemetryspec.Operation) TelemetryOperation
GetTelemetrySession() string
GetTelemetryHostName() string
GetTelemetryGUID() string
GetInstanceName() string
GetTelemetryURI() string
CloseTelemetry()
Expand Down Expand Up @@ -401,11 +401,11 @@ func (l logger) GetTelemetryVersion() string {
return l.loggerState.telemetry.telemetryConfig.Version
}

func (l logger) GetTelemetryHostName() string {
func (l logger) GetTelemetryGUID() string {
if !l.GetTelemetryEnabled() {
return ""
}
return l.loggerState.telemetry.telemetryConfig.getHostName()
return l.loggerState.telemetry.telemetryConfig.getHostGUID()
}

func (l logger) GetInstanceName() string {
Expand Down
10 changes: 5 additions & 5 deletions logging/telemetryConfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,13 @@ func (cfg TelemetryConfig) Save(configPath string) error {
return err
}

// getHostName returns the HostName for telemetry (GUID:Name -- :Name is optional if blank)
func (cfg TelemetryConfig) getHostName() string {
hostName := cfg.GUID
// getHostGUID returns the Host GUID for telemetry (GUID:Name -- :Name is optional if blank)
func (cfg TelemetryConfig) getHostGUID() string {
ret := cfg.GUID
if cfg.Enable && len(cfg.Name) > 0 {
hostName += ":" + cfg.Name
ret += ":" + cfg.Name
}
return hostName
return ret
}

// getInstanceName allows us to distinguish between multiple instances running on the same node.
Expand Down
2 changes: 1 addition & 1 deletion logging/telemetryhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ func createElasticHook(cfg TelemetryConfig) (hook logrus.Hook, err error) {
err = fmt.Errorf("Unable to create new elastic client on '%s' using '%s:%s' : %w", cfg.URI, cfg.UserName, cfg.Password, err)
return nil, err
}
hostName := cfg.getHostName()
hostName := cfg.getHostGUID()
hook, err = elogrus.NewElasticHook(client, hostName, cfg.MinLogLevel, cfg.ChainID)

if err != nil {
Expand Down
22 changes: 11 additions & 11 deletions logging/telemetryspec/event.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,10 @@ const ConnectPeerEvent Event = "ConnectPeer"

// PeerEventDetails contains details for the ConnectPeerEvent
type PeerEventDetails struct {
Address string
HostName string
Incoming bool
InstanceName string
Address string
TelemetryGUID string `json:"HostName"`
Incoming bool
InstanceName string
// Endpoint is the dialed-to address, for an outgoing connection. Not being used for incoming connection.
Endpoint string `json:",omitempty"`
// MessageDelay is the avarage relative message delay. Not being used for incoming connection.
Expand All @@ -206,11 +206,11 @@ const ConnectPeerFailEvent Event = "ConnectPeerFail"

// ConnectPeerFailEventDetails contains details for the ConnectPeerFailEvent
type ConnectPeerFailEventDetails struct {
Address string
HostName string
Incoming bool
InstanceName string
Reason string
Address string
TelemetryGUID string `json:"HostName"`
Incoming bool
InstanceName string
Reason string
}

// DisconnectPeerEvent event
Expand Down Expand Up @@ -282,8 +282,8 @@ type PeersConnectionDetails struct {
type PeerConnectionDetails struct {
// Address is the IP address of the remote connected socket
Address string
// The HostName is the TelemetryGUID passed via the X-Algorand-TelId header during the http connection handshake.
HostName string
// The TelemetryGUID is the TelemetryGUID passed via the X-Algorand-TelId header during the http connection handshake.
TelemetryGUID string `json:"HostName"`
// InstanceName is the node-specific hashed instance name that was passed via X-Algorand-InstanceName header during the http connection handshake.
InstanceName string
// ConnectionDuration is the duration of the connection, in seconds.
Expand Down
10 changes: 5 additions & 5 deletions network/requestTracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -482,11 +482,11 @@ func (rt *RequestTracker) ServeHTTP(response http.ResponseWriter, request *http.
rt.log.With("connection", "http").With("count", originConnections).Debugf("Rejected connection due to excessive connections attempt rate")
rt.log.EventWithDetails(telemetryspec.Network, telemetryspec.ConnectPeerFailEvent,
telemetryspec.ConnectPeerFailEventDetails{
Address: trackedRequest.remoteHost,
HostName: trackedRequest.otherTelemetryGUID,
Incoming: true,
InstanceName: trackedRequest.otherInstanceName,
Reason: "Remote IP Connection Rate Limit",
Address: trackedRequest.remoteHost,
TelemetryGUID: trackedRequest.otherTelemetryGUID,
Incoming: true,
InstanceName: trackedRequest.otherInstanceName,
Reason: "Remote IP Connection Rate Limit",
})
response.Header().Add(TooManyRequestsRetryAfterHeader, fmt.Sprintf("%d", rt.config.ConnectionsRateLimitingWindowSeconds))
response.WriteHeader(http.StatusTooManyRequests)
Expand Down
50 changes: 25 additions & 25 deletions network/wsNetwork.go
Original file line number Diff line number Diff line change
Expand Up @@ -917,7 +917,7 @@ func (wn *WebsocketNetwork) ClearHandlers() {
}

func (wn *WebsocketNetwork) setHeaders(header http.Header) {
localTelemetryGUID := wn.log.GetTelemetryHostName()
localTelemetryGUID := wn.log.GetTelemetryGUID()
localInstanceName := wn.log.GetInstanceName()
header.Set(TelemetryIDHeader, localTelemetryGUID)
header.Set(InstanceNameHeader, localInstanceName)
Expand Down Expand Up @@ -970,11 +970,11 @@ func (wn *WebsocketNetwork) checkIncomingConnectionLimits(response http.Response
networkConnectionsDroppedTotal.Inc(map[string]string{"reason": "incoming_connection_limit"})
wn.log.EventWithDetails(telemetryspec.Network, telemetryspec.ConnectPeerFailEvent,
telemetryspec.ConnectPeerFailEventDetails{
Address: remoteHost,
HostName: otherTelemetryGUID,
Incoming: true,
InstanceName: otherInstanceName,
Reason: "Connection Limit",
Address: remoteHost,
TelemetryGUID: otherTelemetryGUID,
Incoming: true,
InstanceName: otherInstanceName,
Reason: "Connection Limit",
})
response.WriteHeader(http.StatusServiceUnavailable)
return http.StatusServiceUnavailable
Expand All @@ -985,11 +985,11 @@ func (wn *WebsocketNetwork) checkIncomingConnectionLimits(response http.Response
networkConnectionsDroppedTotal.Inc(map[string]string{"reason": "incoming_connection_per_ip_limit"})
wn.log.EventWithDetails(telemetryspec.Network, telemetryspec.ConnectPeerFailEvent,
telemetryspec.ConnectPeerFailEventDetails{
Address: remoteHost,
HostName: otherTelemetryGUID,
Incoming: true,
InstanceName: otherInstanceName,
Reason: "Remote IP Connection Limit",
Address: remoteHost,
TelemetryGUID: otherTelemetryGUID,
Incoming: true,
InstanceName: otherInstanceName,
Reason: "Remote IP Connection Limit",
})
response.WriteHeader(http.StatusServiceUnavailable)
return http.StatusServiceUnavailable
Expand Down Expand Up @@ -1154,10 +1154,10 @@ func (wn *WebsocketNetwork) ServeHTTP(response http.ResponseWriter, request *htt
wn.log.With("event", "ConnectedIn").With("remote", trackedRequest.otherPublicAddr).With("local", localAddr).Infof("Accepted incoming connection from peer %s", trackedRequest.otherPublicAddr)
wn.log.EventWithDetails(telemetryspec.Network, telemetryspec.ConnectPeerEvent,
telemetryspec.PeerEventDetails{
Address: trackedRequest.remoteHost,
HostName: trackedRequest.otherTelemetryGUID,
Incoming: true,
InstanceName: trackedRequest.otherInstanceName,
Address: trackedRequest.remoteHost,
TelemetryGUID: trackedRequest.otherTelemetryGUID,
Incoming: true,
InstanceName: trackedRequest.otherInstanceName,
})

wn.maybeSendMessagesOfInterest(peer, nil)
Expand Down Expand Up @@ -1754,7 +1754,7 @@ func (wn *WebsocketNetwork) sendPeerConnectionsTelemetryStatus() {
for _, peer := range peers {
connDetail := telemetryspec.PeerConnectionDetails{
ConnectionDuration: uint(now.Sub(peer.createTime).Seconds()),
HostName: peer.TelemetryGUID,
TelemetryGUID: peer.TelemetryGUID,
InstanceName: peer.InstanceName,
}
if peer.outgoing {
Expand Down Expand Up @@ -2098,11 +2098,11 @@ func (wn *WebsocketNetwork) tryConnect(addr, gossipAddr string) {
wn.log.With("event", "ConnectedOut").With("remote", addr).With("local", localAddr).Infof("Made outgoing connection to peer %v", addr)
wn.log.EventWithDetails(telemetryspec.Network, telemetryspec.ConnectPeerEvent,
telemetryspec.PeerEventDetails{
Address: justHost(conn.RemoteAddr().String()),
HostName: peer.TelemetryGUID,
Incoming: false,
InstanceName: peer.InstanceName,
Endpoint: peer.GetAddress(),
Address: justHost(conn.RemoteAddr().String()),
TelemetryGUID: peer.TelemetryGUID,
Incoming: false,
InstanceName: peer.InstanceName,
Endpoint: peer.GetAddress(),
})

wn.maybeSendMessagesOfInterest(peer, nil)
Expand Down Expand Up @@ -2206,10 +2206,10 @@ func (wn *WebsocketNetwork) removePeer(peer *wsPeer, reason disconnectReason) {
}
}
eventDetails := telemetryspec.PeerEventDetails{
Address: peerAddr,
HostName: peer.TelemetryGUID,
Incoming: !peer.outgoing,
InstanceName: peer.InstanceName,
Address: peerAddr,
TelemetryGUID: peer.TelemetryGUID,
Incoming: !peer.outgoing,
InstanceName: peer.InstanceName,
}
if peer.outgoing {
eventDetails.Endpoint = peer.GetAddress()
Expand Down