-
Notifications
You must be signed in to change notification settings - Fork 25.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DISCOVERY: Cleanup AbstractDisruptionTestCase #34808
Changes from 10 commits
6300be8
4101a4f
3d17825
3a558be
a0b1886
1974013
cd1ed87
c7069d7
d7be238
471ae32
2146d2a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,23 +19,19 @@ | |
|
||
package org.elasticsearch.discovery; | ||
|
||
import java.nio.file.Path; | ||
import org.elasticsearch.cluster.ClusterState; | ||
import org.elasticsearch.cluster.block.ClusterBlock; | ||
import org.elasticsearch.cluster.block.ClusterBlockLevel; | ||
import org.elasticsearch.cluster.node.DiscoveryNodes; | ||
import org.elasticsearch.common.Nullable; | ||
import org.elasticsearch.common.settings.Settings; | ||
import org.elasticsearch.common.unit.TimeValue; | ||
import org.elasticsearch.discovery.zen.ElectMasterService; | ||
import org.elasticsearch.discovery.zen.FaultDetection; | ||
import org.elasticsearch.discovery.zen.UnicastZenPing; | ||
import org.elasticsearch.discovery.zen.ZenPing; | ||
import org.elasticsearch.env.NodeEnvironment; | ||
import org.elasticsearch.plugins.Plugin; | ||
import org.elasticsearch.test.ESIntegTestCase; | ||
import org.elasticsearch.test.InternalTestCluster; | ||
import org.elasticsearch.test.NodeConfigurationSource; | ||
import org.elasticsearch.test.discovery.TestZenDiscovery; | ||
import org.elasticsearch.test.disruption.NetworkDisruption; | ||
import org.elasticsearch.test.disruption.NetworkDisruption.Bridge; | ||
|
@@ -56,27 +52,30 @@ | |
import java.util.Set; | ||
import java.util.concurrent.TimeUnit; | ||
|
||
import static org.elasticsearch.discovery.DiscoveryModule.DISCOVERY_HOSTS_PROVIDER_SETTING; | ||
import static org.hamcrest.Matchers.equalTo; | ||
import static org.hamcrest.Matchers.not; | ||
|
||
public abstract class AbstractDisruptionTestCase extends ESIntegTestCase { | ||
|
||
static final TimeValue DISRUPTION_HEALING_OVERHEAD = TimeValue.timeValueSeconds(40); // we use 30s as timeout in many places. | ||
|
||
private NodeConfigurationSource discoveryConfig; | ||
static final Settings DEFAULT_SETTINGS = Settings.builder() | ||
.put(FaultDetection.PING_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly | ||
.put(FaultDetection.PING_RETRIES_SETTING.getKey(), "1") // for hitting simulated network failures quickly | ||
.put("discovery.zen.join_timeout", "10s") // still long to induce failures but to long so test won't time out | ||
.put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), "1s") // <-- for hitting simulated network failures quickly | ||
.put(TransportService.TCP_CONNECT_TIMEOUT.getKey(), "10s") // Network delay disruption waits for the min between this | ||
// value and the time of disruption and does not recover immediately | ||
// when disruption is stop. We should make sure we recover faster | ||
// then the default of 30s, causing ensureGreen and friends to time out | ||
.build(); | ||
|
||
@Override | ||
protected Settings nodeSettings(int nodeOrdinal) { | ||
return Settings.builder().put(discoveryConfig.nodeSettings(nodeOrdinal)) | ||
return Settings.builder().put(super.nodeSettings(nodeOrdinal)).put(DEFAULT_SETTINGS) | ||
.put(TestZenDiscovery.USE_MOCK_PINGS.getKey(), false).build(); | ||
} | ||
|
||
@Before | ||
public void clearConfig() { | ||
discoveryConfig = null; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🎉 |
||
} | ||
|
||
@Override | ||
protected int numberOfShards() { | ||
return 3; | ||
|
@@ -119,11 +118,6 @@ protected void beforeIndexDeletion() throws Exception { | |
} | ||
|
||
List<String> startCluster(int numberOfNodes) { | ||
return startCluster(numberOfNodes, -1); | ||
} | ||
|
||
List<String> startCluster(int numberOfNodes, int minimumMasterNode) { | ||
configureCluster(numberOfNodes, minimumMasterNode); | ||
InternalTestCluster internalCluster = internalCluster(); | ||
List<String> nodes = internalCluster.startNodes(numberOfNodes); | ||
ensureStableCluster(numberOfNodes); | ||
|
@@ -136,54 +130,11 @@ List<String> startCluster(int numberOfNodes, int minimumMasterNode) { | |
return nodes; | ||
} | ||
|
||
static final Settings DEFAULT_SETTINGS = Settings.builder() | ||
.put(FaultDetection.PING_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly | ||
.put(FaultDetection.PING_RETRIES_SETTING.getKey(), "1") // for hitting simulated network failures quickly | ||
.put("discovery.zen.join_timeout", "10s") // still long to induce failures but to long so test won't time out | ||
.put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), "1s") // <-- for hitting simulated network failures quickly | ||
.put(TransportService.TCP_CONNECT_TIMEOUT.getKey(), "10s") // Network delay disruption waits for the min between this | ||
// value and the time of disruption and does not recover immediately | ||
// when disruption is stop. We should make sure we recover faster | ||
// then the default of 30s, causing ensureGreen and friends to time out | ||
.build(); | ||
|
||
@Override | ||
protected Collection<Class<? extends Plugin>> nodePlugins() { | ||
return Arrays.asList(MockTransportService.TestPlugin.class); | ||
} | ||
|
||
void configureCluster(int numberOfNodes, int minimumMasterNode) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🎉 🎉 |
||
configureCluster(DEFAULT_SETTINGS, numberOfNodes, minimumMasterNode); | ||
} | ||
|
||
void configureCluster(Settings settings, int numberOfNodes, int minimumMasterNode) { | ||
if (minimumMasterNode < 0) { | ||
minimumMasterNode = numberOfNodes / 2 + 1; | ||
} | ||
logger.info("---> configured unicast"); | ||
// TODO: Rarely use default settings form some of these | ||
Settings nodeSettings = Settings.builder() | ||
.put(settings) | ||
.put(NodeEnvironment.MAX_LOCAL_STORAGE_NODES_SETTING.getKey(), numberOfNodes) | ||
.put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), minimumMasterNode) | ||
.putList(DISCOVERY_HOSTS_PROVIDER_SETTING.getKey(), "file") | ||
.build(); | ||
|
||
if (discoveryConfig == null) { | ||
discoveryConfig = new NodeConfigurationSource() { | ||
@Override | ||
public Settings nodeSettings(final int nodeOrdinal) { | ||
return nodeSettings; | ||
} | ||
|
||
@Override | ||
public Path nodeConfigPath(final int nodeOrdinal) { | ||
return null; | ||
} | ||
}; | ||
} | ||
} | ||
|
||
ClusterState getNodeClusterState(String node) { | ||
return client(node).admin().cluster().prepareState().setLocal(true).get().getState(); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
/* | ||
* Licensed to Elasticsearch under one or more contributor | ||
* license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright | ||
* ownership. Elasticsearch licenses this file to you under | ||
* the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.elasticsearch.discovery; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.Collection; | ||
import java.util.List; | ||
import org.elasticsearch.action.index.IndexRequestBuilder; | ||
import org.elasticsearch.cluster.ClusterState; | ||
import org.elasticsearch.cluster.metadata.IndexMetaData; | ||
import org.elasticsearch.common.settings.Settings; | ||
import org.elasticsearch.common.xcontent.XContentType; | ||
import org.elasticsearch.indices.store.IndicesStoreIntegrationIT; | ||
import org.elasticsearch.plugins.Plugin; | ||
import org.elasticsearch.test.ESIntegTestCase; | ||
import org.elasticsearch.test.InternalTestCluster; | ||
import org.elasticsearch.test.disruption.NetworkDisruption; | ||
import org.elasticsearch.test.transport.MockTransportService; | ||
|
||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; | ||
import static org.hamcrest.Matchers.equalTo; | ||
|
||
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, transportClientRatio = 0) | ||
public class ClusterDisruptionCleanSettingsIT extends ESIntegTestCase { | ||
|
||
@Override | ||
protected Collection<Class<? extends Plugin>> nodePlugins() { | ||
return Arrays.asList(MockTransportService.TestPlugin.class); | ||
} | ||
|
||
/** | ||
* This test creates a scenario where a primary shard (0 replicas) relocates and is in POST_RECOVERY on the target | ||
* node but already deleted on the source node. Search request should still work. | ||
*/ | ||
public void testSearchWithRelocationAndSlowClusterStateProcessing() throws Exception { | ||
internalCluster().startMasterOnlyNode(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the comment about why this couldn't use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added it back :) |
||
final String node_1 = internalCluster().startDataOnlyNode(); | ||
|
||
logger.info("--> creating index [test] with one shard and on replica"); | ||
assertAcked(prepareCreate("test").setSettings( | ||
Settings.builder().put(indexSettings()) | ||
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) | ||
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)) | ||
); | ||
ensureGreen("test"); | ||
|
||
final String node_2 = internalCluster().startDataOnlyNode(); | ||
List<IndexRequestBuilder> indexRequestBuilderList = new ArrayList<>(); | ||
for (int i = 0; i < 100; i++) { | ||
indexRequestBuilderList.add(client().prepareIndex().setIndex("test").setType("_doc") | ||
.setSource("{\"int_field\":1}", XContentType.JSON)); | ||
} | ||
indexRandom(true, indexRequestBuilderList); | ||
|
||
IndicesStoreIntegrationIT.relocateAndBlockCompletion(logger, "test", 0, node_1, node_2); | ||
// now search for the documents and see if we get a reply | ||
assertThat(client().prepareSearch().setSize(0).get().getHits().getTotalHits(), equalTo(100L)); | ||
} | ||
|
||
/** | ||
* Tests that indices are properly deleted even if there is a master transition in between. | ||
* Test for https://github.com/elastic/elasticsearch/issues/11665 | ||
*/ | ||
public void testIndicesDeleted() throws Exception { | ||
final Settings settings = Settings.builder() | ||
.put(AbstractDisruptionTestCase.DEFAULT_SETTINGS) | ||
.put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), "0s") // don't wait on isolated data node | ||
.put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), "30s") // wait till cluster state is committed | ||
original-brownbear marked this conversation as resolved.
Show resolved
Hide resolved
|
||
.build(); | ||
final String idxName = "test"; | ||
final List<String> allMasterEligibleNodes = internalCluster().startMasterOnlyNodes(2, settings); | ||
final String dataNode = internalCluster().startDataOnlyNode(settings); | ||
ensureStableCluster(3); | ||
assertAcked(prepareCreate("test")); | ||
|
||
final String masterNode1 = internalCluster().getMasterName(); | ||
NetworkDisruption networkDisruption = | ||
new NetworkDisruption(new NetworkDisruption.TwoPartitions(masterNode1, dataNode), new NetworkDisruption.NetworkUnresponsive()); | ||
internalCluster().setDisruptionScheme(networkDisruption); | ||
networkDisruption.startDisrupting(); | ||
// We know this will time out due to the partition, we check manually below to not proceed until | ||
// the delete has been applied to the master node and the master eligible node. | ||
internalCluster().client(masterNode1).admin().indices().prepareDelete(idxName).setTimeout("0s").get(); | ||
// Don't restart the master node until we know the index deletion has taken effect on master and the master eligible node. | ||
assertBusy(() -> { | ||
for (String masterNode : allMasterEligibleNodes) { | ||
final ClusterState masterState = internalCluster().clusterService(masterNode).state(); | ||
assertTrue("index not deleted on " + masterNode, masterState.metaData().hasIndex(idxName) == false); | ||
} | ||
}); | ||
internalCluster().restartNode(masterNode1, InternalTestCluster.EMPTY_CALLBACK); | ||
ensureYellow(); | ||
assertFalse(client().admin().indices().prepareExists(idxName).get().isExists()); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: could this move back down to where it was before, since it's not being changed? Would make the diff smaller.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done :)