Skip to content

Commit 468f120

Browse files
authored
Optimise snapshot deletion to speed up snapshot deletion and creation (#15568) (#15724)
--------- Signed-off-by: Ashish Singh <[email protected]>
1 parent c91f2f1 commit 468f120

File tree

4 files changed

+87
-14
lines changed

4 files changed

+87
-14
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
4747
- Add canRemain method to TargetPoolAllocationDecider to move shards from local to remote pool for hot to warm tiering ([#15010](https://github.com/opensearch-project/OpenSearch/pull/15010))
4848
- Add support for pluggable deciders for concurrent search ([#15363](https://github.com/opensearch-project/OpenSearch/pull/15363))
4949
- Add support for comma-separated list of index names to be used with Snapshot Status API ([#15409](https://github.com/opensearch-project/OpenSearch/pull/15409))[SnapshotV2] Snapshot Status API changes (#15409))
50+
- Optimise snapshot deletion to speed up snapshot deletion and creation ([#15568](https://github.com/opensearch-project/OpenSearch/pull/15568))
5051
- [Remote Publication] Added checksum validation for cluster state behind a cluster setting ([#15218](https://github.com/opensearch-project/OpenSearch/pull/15218))
5152
- Relax the join validation for Remote State publication ([#15471](https://github.com/opensearch-project/OpenSearch/pull/15471))
5253
- Optimize NodeIndicesStats output behind flag ([#14454](https://github.com/opensearch-project/OpenSearch/pull/14454))

server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java

+78-14
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
import org.opensearch.common.Nullable;
6767
import org.opensearch.common.Numbers;
6868
import org.opensearch.common.Priority;
69+
import org.opensearch.common.Randomness;
6970
import org.opensearch.common.SetOnce;
7071
import org.opensearch.common.UUIDs;
7172
import org.opensearch.common.blobstore.BlobContainer;
@@ -1460,6 +1461,10 @@ private void asyncCleanupUnlinkedShardLevelBlobs(
14601461
ActionListener<Void> listener
14611462
) {
14621463
final List<Tuple<BlobPath, String>> filesToDelete = resolveFilesToDelete(oldRepositoryData, snapshotIds, deleteResults);
1464+
long startTimeNs = System.nanoTime();
1465+
Randomness.shuffle(filesToDelete);
1466+
logger.debug("[{}] shuffled the filesToDelete with timeElapsedNs={}", metadata.name(), (System.nanoTime() - startTimeNs));
1467+
14631468
if (filesToDelete.isEmpty()) {
14641469
listener.onResponse(null);
14651470
return;
@@ -1477,8 +1482,8 @@ private void asyncCleanupUnlinkedShardLevelBlobs(
14771482
staleFilesToDeleteInBatch.size()
14781483
);
14791484

1480-
// Start as many workers as fit into the snapshot pool at once at the most
1481-
final int workers = Math.min(threadPool.info(ThreadPool.Names.SNAPSHOT).getMax(), staleFilesToDeleteInBatch.size());
1485+
// Start as many workers as fit into the snapshot_deletion pool at once at the most
1486+
final int workers = Math.min(threadPool.info(ThreadPool.Names.SNAPSHOT_DELETION).getMax(), staleFilesToDeleteInBatch.size());
14821487
for (int i = 0; i < workers; ++i) {
14831488
executeStaleShardDelete(staleFilesToDeleteInBatch, remoteStoreLockManagerFactory, groupedListener);
14841489
}
@@ -1582,7 +1587,7 @@ private void executeStaleShardDelete(
15821587
if (filesToDelete == null) {
15831588
return;
15841589
}
1585-
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.wrap(listener, l -> {
1590+
threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION).execute(ActionRunnable.wrap(listener, l -> {
15861591
try {
15871592
// filtering files for which remote store lock release and cleanup succeeded,
15881593
// remaining files for which it failed will be retried in next snapshot delete run.
@@ -1646,7 +1651,7 @@ private void writeUpdatedShardMetaDataAndComputeDeletes(
16461651
ActionListener<Collection<ShardSnapshotMetaDeleteResult>> onAllShardsCompleted
16471652
) {
16481653

1649-
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
1654+
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION);
16501655
final List<IndexId> indices = oldRepositoryData.indicesToUpdateAfterRemovingSnapshot(snapshotIds);
16511656

16521657
if (indices.isEmpty()) {
@@ -1836,7 +1841,7 @@ private void cleanupStaleBlobs(
18361841
listener.onResponse(deleteResult);
18371842
}, listener::onFailure), 2);
18381843

1839-
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
1844+
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION);
18401845
final List<String> staleRootBlobs = staleRootBlobs(newRepoData, rootBlobs.keySet());
18411846
if (staleRootBlobs.isEmpty()) {
18421847
groupedListener.onResponse(DeleteResult.ZERO);
@@ -2049,7 +2054,7 @@ void cleanupStaleIndices(
20492054

20502055
// Start as many workers as fit into the snapshot pool at once at the most
20512056
final int workers = Math.min(
2052-
threadPool.info(ThreadPool.Names.SNAPSHOT).getMax(),
2057+
threadPool.info(ThreadPool.Names.SNAPSHOT_DELETION).getMax(),
20532058
foundIndices.size() - survivingIndexIds.size()
20542059
);
20552060
for (int i = 0; i < workers; ++i) {
@@ -2107,7 +2112,7 @@ private void executeOneStaleIndexDelete(
21072112
return;
21082113
}
21092114
final String indexSnId = indexEntry.getKey();
2110-
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.supply(listener, () -> {
2115+
threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION).execute(ActionRunnable.supply(listener, () -> {
21112116
try {
21122117
logger.debug("[{}] Found stale index [{}]. Cleaning it up", metadata.name(), indexSnId);
21132118
List<String> matchingShardPaths = findMatchingShardPaths(indexSnId, snapshotShardPaths);
@@ -2475,9 +2480,10 @@ public void finalizeSnapshot(
24752480
repositoryUpdatePriority,
24762481
ActionListener.wrap(newRepoData -> {
24772482
if (writeShardGens) {
2478-
cleanupOldShardGens(existingRepositoryData, updatedRepositoryData);
2483+
cleanupOldShardGens(existingRepositoryData, updatedRepositoryData, newRepoData, listener);
2484+
} else {
2485+
listener.onResponse(newRepoData);
24792486
}
2480-
listener.onResponse(newRepoData);
24812487
}, onUpdateFailure)
24822488
);
24832489
}, onUpdateFailure), 2 + indices.size());
@@ -2642,7 +2648,12 @@ private void logShardPathsOperationWarning(IndexId indexId, SnapshotId snapshotI
26422648
}
26432649

26442650
// Delete all old shard gen blobs that aren't referenced any longer as a result from moving to updated repository data
2645-
private void cleanupOldShardGens(RepositoryData existingRepositoryData, RepositoryData updatedRepositoryData) {
2651+
private void cleanupOldShardGens(
2652+
RepositoryData existingRepositoryData,
2653+
RepositoryData updatedRepositoryData,
2654+
RepositoryData newRepositoryData,
2655+
ActionListener<RepositoryData> listener
2656+
) {
26462657
final List<String> toDelete = new ArrayList<>();
26472658
updatedRepositoryData.shardGenerations()
26482659
.obsoleteShardGenerations(existingRepositoryData.shardGenerations())
@@ -2651,10 +2662,62 @@ private void cleanupOldShardGens(RepositoryData existingRepositoryData, Reposito
26512662
(shardId, oldGen) -> toDelete.add(shardPath(indexId, shardId).buildAsString() + INDEX_FILE_PREFIX + oldGen)
26522663
)
26532664
);
2665+
if (toDelete.isEmpty()) {
2666+
listener.onResponse(newRepositoryData);
2667+
return;
2668+
}
26542669
try {
2655-
deleteFromContainer(rootBlobContainer(), toDelete);
2670+
AtomicInteger counter = new AtomicInteger();
2671+
Collection<List<String>> subList = toDelete.stream()
2672+
.collect(Collectors.groupingBy(it -> counter.getAndIncrement() / maxShardBlobDeleteBatch))
2673+
.values();
2674+
final BlockingQueue<List<String>> staleFilesToDeleteInBatch = new LinkedBlockingQueue<>(subList);
2675+
logger.info(
2676+
"[{}] cleanupOldShardGens toDeleteSize={} groupSize={}",
2677+
metadata.name(),
2678+
toDelete.size(),
2679+
staleFilesToDeleteInBatch.size()
2680+
);
2681+
final GroupedActionListener<Void> groupedListener = new GroupedActionListener<>(ActionListener.wrap(r -> {
2682+
logger.info("[{}] completed cleanupOldShardGens", metadata.name());
2683+
listener.onResponse(newRepositoryData);
2684+
}, ex -> {
2685+
logger.error(new ParameterizedMessage("[{}] exception in cleanupOldShardGens", metadata.name()), ex);
2686+
listener.onResponse(newRepositoryData);
2687+
}), staleFilesToDeleteInBatch.size());
2688+
2689+
// Start as many workers as fit into the snapshot pool at once at the most
2690+
final int workers = Math.min(threadPool.info(ThreadPool.Names.SNAPSHOT_DELETION).getMax(), staleFilesToDeleteInBatch.size());
2691+
for (int i = 0; i < workers; ++i) {
2692+
executeOldShardGensCleanup(staleFilesToDeleteInBatch, groupedListener);
2693+
}
26562694
} catch (Exception e) {
2657-
logger.warn("Failed to clean up old shard generation blobs", e);
2695+
logger.warn(new ParameterizedMessage(" [{}] Failed to clean up old shard generation blobs", metadata.name()), e);
2696+
listener.onResponse(newRepositoryData);
2697+
}
2698+
}
2699+
2700+
private void executeOldShardGensCleanup(BlockingQueue<List<String>> staleFilesToDeleteInBatch, GroupedActionListener<Void> listener)
2701+
throws InterruptedException {
2702+
List<String> filesToDelete = staleFilesToDeleteInBatch.poll(0L, TimeUnit.MILLISECONDS);
2703+
if (filesToDelete != null) {
2704+
threadPool.executor(ThreadPool.Names.SNAPSHOT_DELETION).execute(ActionRunnable.wrap(listener, l -> {
2705+
try {
2706+
deleteFromContainer(rootBlobContainer(), filesToDelete);
2707+
l.onResponse(null);
2708+
} catch (Exception e) {
2709+
logger.warn(
2710+
() -> new ParameterizedMessage(
2711+
"[{}] Failed to delete following blobs during cleanupOldFiles : {}",
2712+
metadata.name(),
2713+
filesToDelete
2714+
),
2715+
e
2716+
);
2717+
l.onFailure(e);
2718+
}
2719+
executeOldShardGensCleanup(staleFilesToDeleteInBatch, listener);
2720+
}));
26582721
}
26592722
}
26602723

@@ -2771,10 +2834,11 @@ public long getRemoteDownloadThrottleTimeInNanos() {
27712834
}
27722835

27732836
protected void assertSnapshotOrGenericThread() {
2774-
assert Thread.currentThread().getName().contains('[' + ThreadPool.Names.SNAPSHOT + ']')
2837+
assert Thread.currentThread().getName().contains('[' + ThreadPool.Names.SNAPSHOT_DELETION + ']')
2838+
|| Thread.currentThread().getName().contains('[' + ThreadPool.Names.SNAPSHOT + ']')
27752839
|| Thread.currentThread().getName().contains('[' + ThreadPool.Names.GENERIC + ']') : "Expected current thread ["
27762840
+ Thread.currentThread()
2777-
+ "] to be the snapshot or generic thread.";
2841+
+ "] to be the snapshot_deletion or snapshot or generic thread.";
27782842
}
27792843

27802844
@Override

server/src/main/java/org/opensearch/threadpool/ThreadPool.java

+7
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ public static class Names {
104104
public static final String REFRESH = "refresh";
105105
public static final String WARMER = "warmer";
106106
public static final String SNAPSHOT = "snapshot";
107+
public static final String SNAPSHOT_DELETION = "snapshot_deletion";
107108
public static final String FORCE_MERGE = "force_merge";
108109
public static final String FETCH_SHARD_STARTED = "fetch_shard_started";
109110
public static final String FETCH_SHARD_STORE = "fetch_shard_store";
@@ -175,6 +176,7 @@ public static ThreadPoolType fromType(String type) {
175176
map.put(Names.REFRESH, ThreadPoolType.SCALING);
176177
map.put(Names.WARMER, ThreadPoolType.SCALING);
177178
map.put(Names.SNAPSHOT, ThreadPoolType.SCALING);
179+
map.put(Names.SNAPSHOT_DELETION, ThreadPoolType.SCALING);
178180
map.put(Names.FORCE_MERGE, ThreadPoolType.FIXED);
179181
map.put(Names.FETCH_SHARD_STARTED, ThreadPoolType.SCALING);
180182
map.put(Names.FETCH_SHARD_STORE, ThreadPoolType.SCALING);
@@ -233,6 +235,7 @@ public ThreadPool(
233235
final int halfProcMaxAt5 = halfAllocatedProcessorsMaxFive(allocatedProcessors);
234236
final int halfProcMaxAt10 = halfAllocatedProcessorsMaxTen(allocatedProcessors);
235237
final int genericThreadPoolMax = boundedBy(4 * allocatedProcessors, 128, 512);
238+
final int snapshotDeletionPoolMax = boundedBy(4 * allocatedProcessors, 64, 256);
236239
builders.put(Names.GENERIC, new ScalingExecutorBuilder(Names.GENERIC, 4, genericThreadPoolMax, TimeValue.timeValueSeconds(30)));
237240
builders.put(Names.WRITE, new FixedExecutorBuilder(settings, Names.WRITE, allocatedProcessors, 10000));
238241
builders.put(Names.GET, new FixedExecutorBuilder(settings, Names.GET, allocatedProcessors, 1000));
@@ -262,6 +265,10 @@ public ThreadPool(
262265
builders.put(Names.REFRESH, new ScalingExecutorBuilder(Names.REFRESH, 1, halfProcMaxAt10, TimeValue.timeValueMinutes(5)));
263266
builders.put(Names.WARMER, new ScalingExecutorBuilder(Names.WARMER, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5)));
264267
builders.put(Names.SNAPSHOT, new ScalingExecutorBuilder(Names.SNAPSHOT, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5)));
268+
builders.put(
269+
Names.SNAPSHOT_DELETION,
270+
new ScalingExecutorBuilder(Names.SNAPSHOT_DELETION, 1, snapshotDeletionPoolMax, TimeValue.timeValueMinutes(5))
271+
);
265272
builders.put(
266273
Names.FETCH_SHARD_STARTED,
267274
new ScalingExecutorBuilder(Names.FETCH_SHARD_STARTED, 1, 2 * allocatedProcessors, TimeValue.timeValueMinutes(5))

server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java

+1
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ private int expectedSize(final String threadPoolName, final int numberOfProcesso
148148
sizes.put(ThreadPool.Names.REFRESH, ThreadPool::halfAllocatedProcessorsMaxTen);
149149
sizes.put(ThreadPool.Names.WARMER, ThreadPool::halfAllocatedProcessorsMaxFive);
150150
sizes.put(ThreadPool.Names.SNAPSHOT, ThreadPool::halfAllocatedProcessorsMaxFive);
151+
sizes.put(ThreadPool.Names.SNAPSHOT_DELETION, n -> ThreadPool.boundedBy(4 * n, 64, 256));
151152
sizes.put(ThreadPool.Names.FETCH_SHARD_STARTED, ThreadPool::twiceAllocatedProcessors);
152153
sizes.put(ThreadPool.Names.FETCH_SHARD_STORE, ThreadPool::twiceAllocatedProcessors);
153154
sizes.put(ThreadPool.Names.TRANSLOG_TRANSFER, ThreadPool::halfAllocatedProcessors);

0 commit comments

Comments
 (0)