From f50fdf22d78b95a1d06aa755c832f280dc7898ad Mon Sep 17 00:00:00 2001 From: chris erway Date: Tue, 7 Jun 2022 15:36:08 -0400 Subject: [PATCH 1/4] specify telemetry hostname in prometheus metrics, if available --- daemon/algod/server.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/daemon/algod/server.go b/daemon/algod/server.go index 07e206a816..6158b0c75c 100644 --- a/daemon/algod/server.go +++ b/daemon/algod/server.go @@ -158,6 +158,9 @@ func (s *Server) Initialize(cfg config.Local, phonebookAddresses []string, genes metricLabels := map[string]string{} if s.log.GetTelemetryEnabled() { metricLabels["telemetry_session"] = s.log.GetTelemetrySession() + if h := s.log.GetTelemetryHostName(); h != "" { + metricLabels["telemetry_host"] = h + } } s.metricCollector = metrics.MakeMetricService( &metrics.ServiceConfig{ From 89bb536779149b508a33e8951002217792650882 Mon Sep 17 00:00:00 2001 From: chris erway Date: Wed, 8 Jun 2022 15:34:00 -0400 Subject: [PATCH 2/4] also get telemetry InstanceName tag --- daemon/algod/server.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/daemon/algod/server.go b/daemon/algod/server.go index 6158b0c75c..567fd01d23 100644 --- a/daemon/algod/server.go +++ b/daemon/algod/server.go @@ -161,6 +161,9 @@ func (s *Server) Initialize(cfg config.Local, phonebookAddresses []string, genes if h := s.log.GetTelemetryHostName(); h != "" { metricLabels["telemetry_host"] = h } + if i := s.log.GetInstanceName(); i != "" { + metricLabels["telemetry_instance"] = i + } } s.metricCollector = metrics.MakeMetricService( &metrics.ServiceConfig{ From 8a45d7fc5b79f5bf8d7882a20fc87c4a4a9ded7d Mon Sep 17 00:00:00 2001 From: chris erway Date: Wed, 8 Jun 2022 16:02:37 -0400 Subject: [PATCH 3/4] rename GetTelemetryHostName to GetTelemetryHostID --- daemon/algod/server.go | 4 ++-- logging/log.go | 6 +++--- logging/telemetryConfig.go | 10 +++++----- logging/telemetryhook.go | 2 +- network/wsNetwork.go | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/daemon/algod/server.go b/daemon/algod/server.go index 567fd01d23..28fdfed6bd 100644 --- a/daemon/algod/server.go +++ b/daemon/algod/server.go @@ -147,7 +147,7 @@ func (s *Server) Initialize(cfg config.Local, phonebookAddresses []string, genes fmt.Fprintln(logWriter, "Logging Starting") if s.log.GetTelemetryUploadingEnabled() { // May or may not be logging to node.log - fmt.Fprintf(logWriter, "Telemetry Enabled: %s\n", s.log.GetTelemetryHostName()) + fmt.Fprintf(logWriter, "Telemetry Enabled: %s\n", s.log.GetTelemetryHostID()) fmt.Fprintf(logWriter, "Session: %s\n", s.log.GetTelemetrySession()) } else { // May or may not be logging to node.log @@ -158,7 +158,7 @@ func (s *Server) Initialize(cfg config.Local, phonebookAddresses []string, genes metricLabels := map[string]string{} if s.log.GetTelemetryEnabled() { metricLabels["telemetry_session"] = s.log.GetTelemetrySession() - if h := s.log.GetTelemetryHostName(); h != "" { + if h := s.log.GetTelemetryHostID(); h != "" { metricLabels["telemetry_host"] = h } if i := s.log.GetInstanceName(); i != "" { diff --git a/logging/log.go b/logging/log.go index 527d6decb6..43f11bb6b2 100644 --- a/logging/log.go +++ b/logging/log.go @@ -157,7 +157,7 @@ type Logger interface { EventWithDetails(category telemetryspec.Category, identifier telemetryspec.Event, details interface{}) StartOperation(category telemetryspec.Category, identifier telemetryspec.Operation) TelemetryOperation GetTelemetrySession() string - GetTelemetryHostName() string + GetTelemetryHostID() string GetInstanceName() string GetTelemetryURI() string CloseTelemetry() @@ -401,11 +401,11 @@ func (l logger) GetTelemetryVersion() string { return l.loggerState.telemetry.telemetryConfig.Version } -func (l logger) GetTelemetryHostName() string { +func (l logger) GetTelemetryHostID() string { if !l.GetTelemetryEnabled() { return "" } - return l.loggerState.telemetry.telemetryConfig.getHostName() + return l.loggerState.telemetry.telemetryConfig.getHostID() } func (l logger) GetInstanceName() string { diff --git a/logging/telemetryConfig.go b/logging/telemetryConfig.go index 0ef98e4502..e8becb7665 100644 --- a/logging/telemetryConfig.go +++ b/logging/telemetryConfig.go @@ -105,13 +105,13 @@ func (cfg TelemetryConfig) Save(configPath string) error { return err } -// getHostName returns the HostName for telemetry (GUID:Name -- :Name is optional if blank) -func (cfg TelemetryConfig) getHostName() string { - hostName := cfg.GUID +// getHostID returns the Host ID for telemetry (GUID:Name -- :Name is optional if blank) +func (cfg TelemetryConfig) getHostID() string { + hostID := cfg.GUID if cfg.Enable && len(cfg.Name) > 0 { - hostName += ":" + cfg.Name + hostID += ":" + cfg.Name } - return hostName + return hostID } // getInstanceName allows us to distinguish between multiple instances running on the same node. diff --git a/logging/telemetryhook.go b/logging/telemetryhook.go index 1a8c297290..146d8ca8ec 100644 --- a/logging/telemetryhook.go +++ b/logging/telemetryhook.go @@ -242,7 +242,7 @@ func createElasticHook(cfg TelemetryConfig) (hook logrus.Hook, err error) { err = fmt.Errorf("Unable to create new elastic client on '%s' using '%s:%s' : %w", cfg.URI, cfg.UserName, cfg.Password, err) return nil, err } - hostName := cfg.getHostName() + hostName := cfg.getHostID() hook, err = elogrus.NewElasticHook(client, hostName, cfg.MinLogLevel, cfg.ChainID) if err != nil { diff --git a/network/wsNetwork.go b/network/wsNetwork.go index 68394a924c..d67a1e8c25 100644 --- a/network/wsNetwork.go +++ b/network/wsNetwork.go @@ -917,7 +917,7 @@ func (wn *WebsocketNetwork) ClearHandlers() { } func (wn *WebsocketNetwork) setHeaders(header http.Header) { - localTelemetryGUID := wn.log.GetTelemetryHostName() + localTelemetryGUID := wn.log.GetTelemetryHostID() localInstanceName := wn.log.GetInstanceName() header.Set(TelemetryIDHeader, localTelemetryGUID) header.Set(InstanceNameHeader, localInstanceName) From e581ebb6b725b17126378c00ee2f7f481f7e3532 Mon Sep 17 00:00:00 2001 From: chris erway Date: Wed, 8 Jun 2022 16:11:33 -0400 Subject: [PATCH 4/4] rename telemetry HostName to TelemetryGUID --- daemon/algod/server.go | 4 +-- logging/log.go | 6 ++-- logging/telemetryConfig.go | 10 +++---- logging/telemetryhook.go | 2 +- logging/telemetryspec/event.go | 22 +++++++-------- network/requestTracker.go | 10 +++---- network/wsNetwork.go | 50 +++++++++++++++++----------------- 7 files changed, 52 insertions(+), 52 deletions(-) diff --git a/daemon/algod/server.go b/daemon/algod/server.go index 28fdfed6bd..c423e8de2d 100644 --- a/daemon/algod/server.go +++ b/daemon/algod/server.go @@ -147,7 +147,7 @@ func (s *Server) Initialize(cfg config.Local, phonebookAddresses []string, genes fmt.Fprintln(logWriter, "Logging Starting") if s.log.GetTelemetryUploadingEnabled() { // May or may not be logging to node.log - fmt.Fprintf(logWriter, "Telemetry Enabled: %s\n", s.log.GetTelemetryHostID()) + fmt.Fprintf(logWriter, "Telemetry Enabled: %s\n", s.log.GetTelemetryGUID()) fmt.Fprintf(logWriter, "Session: %s\n", s.log.GetTelemetrySession()) } else { // May or may not be logging to node.log @@ -158,7 +158,7 @@ func (s *Server) Initialize(cfg config.Local, phonebookAddresses []string, genes metricLabels := map[string]string{} if s.log.GetTelemetryEnabled() { metricLabels["telemetry_session"] = s.log.GetTelemetrySession() - if h := s.log.GetTelemetryHostID(); h != "" { + if h := s.log.GetTelemetryGUID(); h != "" { metricLabels["telemetry_host"] = h } if i := s.log.GetInstanceName(); i != "" { diff --git a/logging/log.go b/logging/log.go index 43f11bb6b2..d0384d0a8c 100644 --- a/logging/log.go +++ b/logging/log.go @@ -157,7 +157,7 @@ type Logger interface { EventWithDetails(category telemetryspec.Category, identifier telemetryspec.Event, details interface{}) StartOperation(category telemetryspec.Category, identifier telemetryspec.Operation) TelemetryOperation GetTelemetrySession() string - GetTelemetryHostID() string + GetTelemetryGUID() string GetInstanceName() string GetTelemetryURI() string CloseTelemetry() @@ -401,11 +401,11 @@ func (l logger) GetTelemetryVersion() string { return l.loggerState.telemetry.telemetryConfig.Version } -func (l logger) GetTelemetryHostID() string { +func (l logger) GetTelemetryGUID() string { if !l.GetTelemetryEnabled() { return "" } - return l.loggerState.telemetry.telemetryConfig.getHostID() + return l.loggerState.telemetry.telemetryConfig.getHostGUID() } func (l logger) GetInstanceName() string { diff --git a/logging/telemetryConfig.go b/logging/telemetryConfig.go index e8becb7665..452202f919 100644 --- a/logging/telemetryConfig.go +++ b/logging/telemetryConfig.go @@ -105,13 +105,13 @@ func (cfg TelemetryConfig) Save(configPath string) error { return err } -// getHostID returns the Host ID for telemetry (GUID:Name -- :Name is optional if blank) -func (cfg TelemetryConfig) getHostID() string { - hostID := cfg.GUID +// getHostGUID returns the Host GUID for telemetry (GUID:Name -- :Name is optional if blank) +func (cfg TelemetryConfig) getHostGUID() string { + ret := cfg.GUID if cfg.Enable && len(cfg.Name) > 0 { - hostID += ":" + cfg.Name + ret += ":" + cfg.Name } - return hostID + return ret } // getInstanceName allows us to distinguish between multiple instances running on the same node. diff --git a/logging/telemetryhook.go b/logging/telemetryhook.go index 146d8ca8ec..b74d8a4475 100644 --- a/logging/telemetryhook.go +++ b/logging/telemetryhook.go @@ -242,7 +242,7 @@ func createElasticHook(cfg TelemetryConfig) (hook logrus.Hook, err error) { err = fmt.Errorf("Unable to create new elastic client on '%s' using '%s:%s' : %w", cfg.URI, cfg.UserName, cfg.Password, err) return nil, err } - hostName := cfg.getHostID() + hostName := cfg.getHostGUID() hook, err = elogrus.NewElasticHook(client, hostName, cfg.MinLogLevel, cfg.ChainID) if err != nil { diff --git a/logging/telemetryspec/event.go b/logging/telemetryspec/event.go index dcd3d231c3..81d2283241 100644 --- a/logging/telemetryspec/event.go +++ b/logging/telemetryspec/event.go @@ -191,10 +191,10 @@ const ConnectPeerEvent Event = "ConnectPeer" // PeerEventDetails contains details for the ConnectPeerEvent type PeerEventDetails struct { - Address string - HostName string - Incoming bool - InstanceName string + Address string + TelemetryGUID string `json:"HostName"` + Incoming bool + InstanceName string // Endpoint is the dialed-to address, for an outgoing connection. Not being used for incoming connection. Endpoint string `json:",omitempty"` // MessageDelay is the avarage relative message delay. Not being used for incoming connection. @@ -206,11 +206,11 @@ const ConnectPeerFailEvent Event = "ConnectPeerFail" // ConnectPeerFailEventDetails contains details for the ConnectPeerFailEvent type ConnectPeerFailEventDetails struct { - Address string - HostName string - Incoming bool - InstanceName string - Reason string + Address string + TelemetryGUID string `json:"HostName"` + Incoming bool + InstanceName string + Reason string } // DisconnectPeerEvent event @@ -282,8 +282,8 @@ type PeersConnectionDetails struct { type PeerConnectionDetails struct { // Address is the IP address of the remote connected socket Address string - // The HostName is the TelemetryGUID passed via the X-Algorand-TelId header during the http connection handshake. - HostName string + // The TelemetryGUID is the TelemetryGUID passed via the X-Algorand-TelId header during the http connection handshake. + TelemetryGUID string `json:"HostName"` // InstanceName is the node-specific hashed instance name that was passed via X-Algorand-InstanceName header during the http connection handshake. InstanceName string // ConnectionDuration is the duration of the connection, in seconds. diff --git a/network/requestTracker.go b/network/requestTracker.go index 13cb2f2054..fd78dadca1 100644 --- a/network/requestTracker.go +++ b/network/requestTracker.go @@ -482,11 +482,11 @@ func (rt *RequestTracker) ServeHTTP(response http.ResponseWriter, request *http. rt.log.With("connection", "http").With("count", originConnections).Debugf("Rejected connection due to excessive connections attempt rate") rt.log.EventWithDetails(telemetryspec.Network, telemetryspec.ConnectPeerFailEvent, telemetryspec.ConnectPeerFailEventDetails{ - Address: trackedRequest.remoteHost, - HostName: trackedRequest.otherTelemetryGUID, - Incoming: true, - InstanceName: trackedRequest.otherInstanceName, - Reason: "Remote IP Connection Rate Limit", + Address: trackedRequest.remoteHost, + TelemetryGUID: trackedRequest.otherTelemetryGUID, + Incoming: true, + InstanceName: trackedRequest.otherInstanceName, + Reason: "Remote IP Connection Rate Limit", }) response.Header().Add(TooManyRequestsRetryAfterHeader, fmt.Sprintf("%d", rt.config.ConnectionsRateLimitingWindowSeconds)) response.WriteHeader(http.StatusTooManyRequests) diff --git a/network/wsNetwork.go b/network/wsNetwork.go index d67a1e8c25..56ff515376 100644 --- a/network/wsNetwork.go +++ b/network/wsNetwork.go @@ -917,7 +917,7 @@ func (wn *WebsocketNetwork) ClearHandlers() { } func (wn *WebsocketNetwork) setHeaders(header http.Header) { - localTelemetryGUID := wn.log.GetTelemetryHostID() + localTelemetryGUID := wn.log.GetTelemetryGUID() localInstanceName := wn.log.GetInstanceName() header.Set(TelemetryIDHeader, localTelemetryGUID) header.Set(InstanceNameHeader, localInstanceName) @@ -970,11 +970,11 @@ func (wn *WebsocketNetwork) checkIncomingConnectionLimits(response http.Response networkConnectionsDroppedTotal.Inc(map[string]string{"reason": "incoming_connection_limit"}) wn.log.EventWithDetails(telemetryspec.Network, telemetryspec.ConnectPeerFailEvent, telemetryspec.ConnectPeerFailEventDetails{ - Address: remoteHost, - HostName: otherTelemetryGUID, - Incoming: true, - InstanceName: otherInstanceName, - Reason: "Connection Limit", + Address: remoteHost, + TelemetryGUID: otherTelemetryGUID, + Incoming: true, + InstanceName: otherInstanceName, + Reason: "Connection Limit", }) response.WriteHeader(http.StatusServiceUnavailable) return http.StatusServiceUnavailable @@ -985,11 +985,11 @@ func (wn *WebsocketNetwork) checkIncomingConnectionLimits(response http.Response networkConnectionsDroppedTotal.Inc(map[string]string{"reason": "incoming_connection_per_ip_limit"}) wn.log.EventWithDetails(telemetryspec.Network, telemetryspec.ConnectPeerFailEvent, telemetryspec.ConnectPeerFailEventDetails{ - Address: remoteHost, - HostName: otherTelemetryGUID, - Incoming: true, - InstanceName: otherInstanceName, - Reason: "Remote IP Connection Limit", + Address: remoteHost, + TelemetryGUID: otherTelemetryGUID, + Incoming: true, + InstanceName: otherInstanceName, + Reason: "Remote IP Connection Limit", }) response.WriteHeader(http.StatusServiceUnavailable) return http.StatusServiceUnavailable @@ -1154,10 +1154,10 @@ func (wn *WebsocketNetwork) ServeHTTP(response http.ResponseWriter, request *htt wn.log.With("event", "ConnectedIn").With("remote", trackedRequest.otherPublicAddr).With("local", localAddr).Infof("Accepted incoming connection from peer %s", trackedRequest.otherPublicAddr) wn.log.EventWithDetails(telemetryspec.Network, telemetryspec.ConnectPeerEvent, telemetryspec.PeerEventDetails{ - Address: trackedRequest.remoteHost, - HostName: trackedRequest.otherTelemetryGUID, - Incoming: true, - InstanceName: trackedRequest.otherInstanceName, + Address: trackedRequest.remoteHost, + TelemetryGUID: trackedRequest.otherTelemetryGUID, + Incoming: true, + InstanceName: trackedRequest.otherInstanceName, }) wn.maybeSendMessagesOfInterest(peer, nil) @@ -1754,7 +1754,7 @@ func (wn *WebsocketNetwork) sendPeerConnectionsTelemetryStatus() { for _, peer := range peers { connDetail := telemetryspec.PeerConnectionDetails{ ConnectionDuration: uint(now.Sub(peer.createTime).Seconds()), - HostName: peer.TelemetryGUID, + TelemetryGUID: peer.TelemetryGUID, InstanceName: peer.InstanceName, } if peer.outgoing { @@ -2098,11 +2098,11 @@ func (wn *WebsocketNetwork) tryConnect(addr, gossipAddr string) { wn.log.With("event", "ConnectedOut").With("remote", addr).With("local", localAddr).Infof("Made outgoing connection to peer %v", addr) wn.log.EventWithDetails(telemetryspec.Network, telemetryspec.ConnectPeerEvent, telemetryspec.PeerEventDetails{ - Address: justHost(conn.RemoteAddr().String()), - HostName: peer.TelemetryGUID, - Incoming: false, - InstanceName: peer.InstanceName, - Endpoint: peer.GetAddress(), + Address: justHost(conn.RemoteAddr().String()), + TelemetryGUID: peer.TelemetryGUID, + Incoming: false, + InstanceName: peer.InstanceName, + Endpoint: peer.GetAddress(), }) wn.maybeSendMessagesOfInterest(peer, nil) @@ -2206,10 +2206,10 @@ func (wn *WebsocketNetwork) removePeer(peer *wsPeer, reason disconnectReason) { } } eventDetails := telemetryspec.PeerEventDetails{ - Address: peerAddr, - HostName: peer.TelemetryGUID, - Incoming: !peer.outgoing, - InstanceName: peer.InstanceName, + Address: peerAddr, + TelemetryGUID: peer.TelemetryGUID, + Incoming: !peer.outgoing, + InstanceName: peer.InstanceName, } if peer.outgoing { eventDetails.Endpoint = peer.GetAddress()