Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ALFREDOPS-850 re-fetch trackers on each call #153

Merged
merged 1 commit into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 30 additions & 11 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,28 +21,46 @@ Version template:
-->

# Alfred Telemetry Changelog
## [0.10.1] - 2023-10-27
### Fixed
* re-fetch trackers on each call [[#153]]

[#153]: https://github.com/xenit-eu/alfred-telemetry/pull/153

## [0.10.0] - 2023-08-31
### Added
* Added support for alfresco 7.4 [#146]
* Added support for alfresco 7.4 [[#146]]

[#146]: https://github.com/xenit-eu/alfred-telemetry/pull/146

## [0.9.3] - 2023-01-13
### Added
* Added TomcatMetrics [#142]
* Added TomcatMetrics [[#142]]

[#142]: https://github.com/xenit-eu/alfred-telemetry/pull/142

## [0.9.2] - 2022-12-23

### Fixed
* First call to Alfred Telemetry endpoint Solr always failing,Fix SolrSnapShot Metrics [#140]
* First call to Alfred Telemetry endpoint Solr always failing,Fix SolrSnapShot Metrics [[#140]]

[#140]: https://github.com/xenit-eu/alfred-telemetry/pull/140

## [0.9.1] - 2022-12-23

### Fixed
* Fix hazel cast metrics with micrometer [#137]
* Fix hazel cast metrics with micrometer [[#137]]

[#137]: https://github.com/xenit-eu/alfred-telemetry/pull/137

## [0.9.0] - 2022-12-01

### Fixed
* Fixes broken Apache Commons dbcp dependencies [#132]
* Fixes broken common tags not added to Alfrescos Prometheus registry [#134]
* Fixes broken Apache Commons dbcp dependencies [[#132]]
* Fixes broken common tags not added to Alfrescos Prometheus registry [[#134]]

[#132]: https://github.com/xenit-eu/alfred-telemetry/pull/132
[#134]: https://github.com/xenit-eu/alfred-telemetry/pull/134

### Added
* Added support Alfresco 7.1, 7.2 and 7.3
Expand All @@ -54,11 +72,12 @@ Version template:

### BREAKING

* Alfred Telemetry declares `micrometer-core` and `micrometer-jvm-extras` as a provided-dependency [#129]
* Alfred Telemetry declares `micrometer-core` and `micrometer-jvm-extras` as a provided-dependency [[#129]]

[#129]: https://github.com/xenit-eu/alfred-telemetry/pull/129

### Fixed
* Fixes bug when tracker is explicitly disabled [#125]
* Fixes bug when tracker is explicitly disabled [[#125]]


## [0.7.2] - 2021-10-07
Expand All @@ -67,13 +86,13 @@ Version template:
* Added Common tags to Alfred Telemetry Solr

### Fixed
* Bug appearing when tracker is explicitly disabled [#125]
* Bug appearing when tracker is explicitly disabled [[#125]]

[#125]: https://github.com/xenit-eu/alfred-telemetry/pull/125

### Added
* Support more flexible Graphite step duration configuration [#123]
* Add metrics for solr backup [#124]
* Support more flexible Graphite step duration configuration [[#123]]
* Add metrics for solr backup [[#124]]

[#123]: https://github.com/xenit-eu/alfred-telemetry/pull/123
[#124]: https://github.com/xenit-eu/alfred-telemetry/pull/124
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package eu.xenit.alfred.telemetry.solr.monitoring.binder;

import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.binder.MeterBinder;
import org.alfresco.solr.AlfrescoCoreAdminHandler;
import org.alfresco.solr.tracker.TrackerRegistry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class AbstractSolrMetrics implements MeterBinder {
private static final Logger logger = LoggerFactory.getLogger(AbstractSolrMetrics.class);
protected final AlfrescoCoreAdminHandler coreAdminHandler;
protected MeterRegistry registry;

AbstractSolrMetrics(AlfrescoCoreAdminHandler coreAdminHandler) {
this.coreAdminHandler = coreAdminHandler;
}

protected TrackerRegistry getTrackerRegistryWhenAvailable() {
logger.info("Registering tracker metrics");
TrackerRegistry trackerRegistry = coreAdminHandler.getTrackerRegistry();

while (trackerRegistry.getCoreNames().isEmpty()) {
logger.error("Solr did not start tracking yet, waiting 10sec");
try {
Thread.currentThread().sleep(10_000);
trackerRegistry = coreAdminHandler.getTrackerRegistry();
} catch (InterruptedException e) {
logger.error("Fail to wait 10 sec", e);
}
}
return trackerRegistry;
}

@Override
public void bindTo(MeterRegistry registry) {
this.registry = registry;
registerMetrics();
}

protected abstract void registerMetrics();
}
Original file line number Diff line number Diff line change
@@ -1,97 +1,85 @@
package eu.xenit.alfred.telemetry.solr.monitoring.binder;

import io.micrometer.core.instrument.Gauge;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Tags;
import io.micrometer.core.instrument.binder.MeterBinder;
import java.io.IOException;
import java.util.Map.Entry;
import org.alfresco.solr.AlfrescoCoreAdminHandler;
import org.alfresco.solr.SolrInformationServer;
import org.alfresco.solr.tracker.TrackerRegistry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SolrCoreStatsMetrics implements MeterBinder {
import java.io.IOException;
import java.util.Map.Entry;

public class SolrCoreStatsMetrics extends AbstractSolrMetrics implements MeterBinder {

private static final String METER_ALFRESCO_NODES = "alfresco.nodes";
private static final String TAG_STATE = "state";
private static final String TAG_VALUE_INDEXED = "Indexed";

private final AlfrescoCoreAdminHandler coreAdminHandler;
private MeterRegistry registry;

private static final Logger logger = LoggerFactory.getLogger(SolrCoreStatsMetrics.class);

public SolrCoreStatsMetrics(AlfrescoCoreAdminHandler coreAdminHandler) {
this.coreAdminHandler = coreAdminHandler;
super(coreAdminHandler);
}

private void registerCoreStats() {
TrackerRegistry trackerRegistry = coreAdminHandler.getTrackerRegistry();

while (trackerRegistry.getCoreNames().size() == 0) {
logger.error("Solr did not start tracking yet, waiting 10sec");
try {
Thread.currentThread().sleep(10_000);
trackerRegistry = coreAdminHandler.getTrackerRegistry();
} catch (InterruptedException e) {
logger.error("Fail to wait 10 sec", e);
}
}
@Override
protected void registerMetrics() {
TrackerRegistry trackerRegistry = getTrackerRegistryWhenAvailable();

for (String coreName : trackerRegistry.getCoreNames()) {
SolrInformationServer server = (SolrInformationServer) coreAdminHandler.getInformationServers()
.get(coreName);

Tags tags = Tags.of("core", coreName, TAG_STATE, TAG_VALUE_INDEXED);
Gauge.builder(METER_ALFRESCO_NODES, server,
x -> getCoreStat(server, "Alfresco Nodes in Index"))
Gauge.builder(METER_ALFRESCO_NODES, coreAdminHandler,
x -> getCoreStat(x, coreName, "Alfresco Nodes in Index"))
.tags(tags)
.register(registry);

tags = Tags.of("core", coreName, TAG_STATE, "Unindexed");
Gauge.builder(METER_ALFRESCO_NODES, server,
x -> getCoreStat(server, "Alfresco Unindexed Nodes"))
Gauge.builder(METER_ALFRESCO_NODES, coreAdminHandler,
x -> getCoreStat(x, coreName, "Alfresco Unindexed Nodes"))
.tags(tags)
.register(registry);

tags = Tags.of("core", coreName, TAG_STATE, "Error");
Gauge.builder(METER_ALFRESCO_NODES, trackerRegistry,
x -> getCoreStat(server, "Alfresco Error Nodes in Index"))
Gauge.builder(METER_ALFRESCO_NODES, coreAdminHandler,
x -> getCoreStat(x, coreName, "Alfresco Error Nodes in Index"))
.tags(tags)
.register(registry);

tags = Tags.of("core", coreName, TAG_STATE, TAG_VALUE_INDEXED);
Gauge.builder("alfresco.acls", trackerRegistry,
x -> getCoreStat(server, "Alfresco Acls in Index"))
Gauge.builder("alfresco.acls", coreAdminHandler,
x -> getCoreStat(x, coreName, "Alfresco Acls in Index"))
.tags(tags)
.register(registry);

tags = Tags.of("core", coreName, TAG_STATE, "States");
Gauge.builder("alfresco.states", trackerRegistry,
x -> getCoreStat(server, "Alfresco States in Index"))
Gauge.builder("alfresco.states", coreAdminHandler,
x -> getCoreStat(x, coreName, "Alfresco States in Index"))
.tags(tags)
.register(registry);

// technically these metrics are not per core, but in order to filter in grafana the core is added as a tag
tags = Tags.of("core", coreName, TAG_STATE, TAG_VALUE_INDEXED);
Gauge.builder("alfresco.transactions.nodes", trackerRegistry,
x -> getCoreStat(server, "Alfresco Transactions in Index"))
Gauge.builder("alfresco.transactions.nodes", coreAdminHandler,
x -> getCoreStat(x, coreName, "Alfresco Transactions in Index"))
.tags(tags)
.register(registry);

tags = Tags.of("core", coreName, TAG_STATE, TAG_VALUE_INDEXED);
Gauge.builder("alfresco.transactions.acls", trackerRegistry,
x -> getCoreStat(server, "Alfresco Acl Transactions in Index"))
Gauge.builder("alfresco.transactions.acls", coreAdminHandler,
x -> getCoreStat(x, coreName, "Alfresco Acl Transactions in Index"))
.tags(tags)
.register(registry);

}
}


private long getCoreStat(SolrInformationServer server, String key) {
private static long getCoreStat(AlfrescoCoreAdminHandler coreAdminHandler, String coreName, String key) {
SolrInformationServer server = (SolrInformationServer) coreAdminHandler.getInformationServers()
.get(coreName);
try {
for (Entry<String, Object> entry : server.getCoreStats()) {
if (key.equals(entry.getKey())) {
Expand All @@ -104,9 +92,4 @@ private long getCoreStat(SolrInformationServer server, String key) {
return -1;
}

@Override
public void bindTo(MeterRegistry registry) {
this.registry = registry;
registerCoreStats();
}
}
Loading