From 6300be86fb14149ca71846d6c1275015dc067129 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 24 Oct 2018 17:04:59 +0200 Subject: [PATCH 1/5] DISCOVERY: Cleanup AbstractDisruptionTestCase * Make the internal test cluster manage minimum master nodes where we used the default of (nodes / 2 + 1) before * Manually set master nodes in the cases where we didn't use the default * Remove use of the `NodeConfigurationSource` indirection * Relates #33675 --- .../discovery/AbstractDisruptionTestCase.java | 44 +++++-------------- .../discovery/ClusterDisruptionIT.java | 13 +++--- .../discovery/DiscoveryDisruptionIT.java | 14 +++--- .../discovery/MasterDisruptionIT.java | 8 ++-- .../discovery/SnapshotDisruptionIT.java | 5 +-- 5 files changed, 27 insertions(+), 57 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java b/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java index 0bb72a4050de8..232a4c40d337d 100644 --- a/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java +++ b/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java @@ -19,7 +19,6 @@ package org.elasticsearch.discovery; -import java.nio.file.Path; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.block.ClusterBlock; import org.elasticsearch.cluster.block.ClusterBlockLevel; @@ -27,7 +26,6 @@ import org.elasticsearch.common.Nullable; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.discovery.zen.ElectMasterService; import org.elasticsearch.discovery.zen.FaultDetection; import org.elasticsearch.discovery.zen.UnicastZenPing; import org.elasticsearch.discovery.zen.ZenPing; @@ -35,7 +33,6 @@ import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.InternalTestCluster; -import org.elasticsearch.test.NodeConfigurationSource; import org.elasticsearch.test.discovery.TestZenDiscovery; import org.elasticsearch.test.disruption.NetworkDisruption; import org.elasticsearch.test.disruption.NetworkDisruption.Bridge; @@ -60,21 +57,22 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.not; +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, transportClientRatio = 0) public abstract class AbstractDisruptionTestCase extends ESIntegTestCase { static final TimeValue DISRUPTION_HEALING_OVERHEAD = TimeValue.timeValueSeconds(40); // we use 30s as timeout in many places. - private NodeConfigurationSource discoveryConfig; + private Settings currentSettings; @Override protected Settings nodeSettings(int nodeOrdinal) { - return Settings.builder().put(discoveryConfig.nodeSettings(nodeOrdinal)) + return Settings.builder().put(currentSettings) .put(TestZenDiscovery.USE_MOCK_PINGS.getKey(), false).build(); } @Before public void clearConfig() { - discoveryConfig = null; + currentSettings = null; } @Override @@ -119,11 +117,7 @@ protected void beforeIndexDeletion() throws Exception { } List startCluster(int numberOfNodes) { - return startCluster(numberOfNodes, -1); - } - - List startCluster(int numberOfNodes, int minimumMasterNode) { - configureCluster(numberOfNodes, minimumMasterNode); + configureCluster(numberOfNodes); InternalTestCluster internalCluster = internalCluster(); List nodes = internalCluster.startNodes(numberOfNodes); ensureStableCluster(numberOfNodes); @@ -152,35 +146,19 @@ protected Collection> nodePlugins() { return Arrays.asList(MockTransportService.TestPlugin.class); } - void configureCluster(int numberOfNodes, int minimumMasterNode) { - configureCluster(DEFAULT_SETTINGS, numberOfNodes, minimumMasterNode); + void configureCluster(int numberOfNodes) { + configureCluster(DEFAULT_SETTINGS, numberOfNodes); } - void configureCluster(Settings settings, int numberOfNodes, int minimumMasterNode) { - if (minimumMasterNode < 0) { - minimumMasterNode = numberOfNodes / 2 + 1; - } + void configureCluster(Settings settings, int numberOfNodes) { logger.info("---> configured unicast"); - // TODO: Rarely use default settings form some of these - Settings nodeSettings = Settings.builder() + if (currentSettings == null) { + // TODO: Rarely use default settings form some of these + currentSettings = Settings.builder() .put(settings) .put(NodeEnvironment.MAX_LOCAL_STORAGE_NODES_SETTING.getKey(), numberOfNodes) - .put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), minimumMasterNode) .putList(DISCOVERY_HOSTS_PROVIDER_SETTING.getKey(), "file") .build(); - - if (discoveryConfig == null) { - discoveryConfig = new NodeConfigurationSource() { - @Override - public Settings nodeSettings(final int nodeOrdinal) { - return nodeSettings; - } - - @Override - public Path nodeConfigPath(final int nodeOrdinal) { - return null; - } - }; } } diff --git a/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java b/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java index b35bf8444e95e..3b85080ff31d2 100644 --- a/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java +++ b/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java @@ -37,8 +37,6 @@ import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.indices.store.IndicesStoreIntegrationIT; -import org.elasticsearch.test.ESIntegTestCase.ClusterScope; -import org.elasticsearch.test.ESIntegTestCase.Scope; import org.elasticsearch.test.InternalTestCluster; import org.elasticsearch.test.disruption.NetworkDisruption; import org.elasticsearch.test.disruption.NetworkDisruption.Bridge; @@ -72,7 +70,6 @@ /** * Tests various cluster operations (e.g., indexing) during disruptions. */ -@ClusterScope(scope = Scope.TEST, numDataNodes = 0, transportClientRatio = 0, autoMinMasterNodes = false) @TestLogging("_root:DEBUG,org.elasticsearch.cluster.service:TRACE") public class ClusterDisruptionIT extends AbstractDisruptionTestCase { @@ -289,7 +286,7 @@ public void testRejoinDocumentExistsInAllShardCopies() throws Exception { // simulate handling of sending shard failure during an isolation public void testSendingShardFailure() throws Exception { - List nodes = startCluster(3, 2); + List nodes = startCluster(3); String masterNode = internalCluster().getMasterName(); List nonMasterNodes = nodes.stream().filter(node -> !node.equals(masterNode)).collect(Collectors.toList()); String nonMasterNode = randomFrom(nonMasterNodes); @@ -363,7 +360,7 @@ public void onFailure(Exception e) { */ public void testSearchWithRelocationAndSlowClusterStateProcessing() throws Exception { // don't use DEFAULT settings (which can cause node disconnects on a slow CI machine) - configureCluster(Settings.EMPTY, 3, 1); + configureCluster(Settings.EMPTY, 3); internalCluster().startMasterOnlyNode(); final String node_1 = internalCluster().startDataOnlyNode(); @@ -390,10 +387,10 @@ public void testSearchWithRelocationAndSlowClusterStateProcessing() throws Excep public void testIndexImportedFromDataOnlyNodesIfMasterLostDataFolder() throws Exception { // test for https://github.com/elastic/elasticsearch/issues/8823 - configureCluster(2, 1); + configureCluster(2); String masterNode = internalCluster().startMasterOnlyNode(Settings.EMPTY); internalCluster().startDataOnlyNode(Settings.EMPTY); - + setMinimumMasterNodes(1); ensureStableCluster(2); assertAcked(prepareCreate("index").setSettings(Settings.builder().put("index.number_of_replicas", 0))); index("index", "_doc", "1", jsonBuilder().startObject().field("text", "some text").endObject()); @@ -421,7 +418,7 @@ public void testIndicesDeleted() throws Exception { .put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), "30s") // wait till cluster state is committed .build(); final String idxName = "test"; - configureCluster(settings, 3, 2); + configureCluster(settings, 3); final List allMasterEligibleNodes = internalCluster().startMasterOnlyNodes(2); final String dataNode = internalCluster().startDataOnlyNode(); ensureStableCluster(3); diff --git a/server/src/test/java/org/elasticsearch/discovery/DiscoveryDisruptionIT.java b/server/src/test/java/org/elasticsearch/discovery/DiscoveryDisruptionIT.java index 2c7f17468ac3a..34483566b0b77 100644 --- a/server/src/test/java/org/elasticsearch/discovery/DiscoveryDisruptionIT.java +++ b/server/src/test/java/org/elasticsearch/discovery/DiscoveryDisruptionIT.java @@ -28,7 +28,6 @@ import org.elasticsearch.discovery.zen.PublishClusterStateAction; import org.elasticsearch.discovery.zen.UnicastZenPing; import org.elasticsearch.discovery.zen.ZenPing; -import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.discovery.TestZenDiscovery; import org.elasticsearch.test.disruption.NetworkDisruption; import org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect; @@ -54,13 +53,12 @@ /** * Tests for discovery during disruptions. */ -@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, transportClientRatio = 0, autoMinMasterNodes = false) @TestLogging("_root:DEBUG,org.elasticsearch.cluster.service:TRACE") public class DiscoveryDisruptionIT extends AbstractDisruptionTestCase { public void testIsolatedUnicastNodes() throws Exception { internalCluster().setHostsListContainsOnlyFirstNode(true); - List nodes = startCluster(4, -1); + List nodes = startCluster(4); // Figure out what is the elected master node final String unicastTarget = nodes.get(0); @@ -100,7 +98,7 @@ public void testIsolatedUnicastNodes() throws Exception { */ public void testUnicastSinglePingResponseContainsMaster() throws Exception { internalCluster().setHostsListContainsOnlyFirstNode(true); - List nodes = startCluster(4, -1); + List nodes = startCluster(4); // Figure out what is the elected master node final String masterNode = internalCluster().getMasterName(); logger.info("---> legit elected master node={}", masterNode); @@ -138,8 +136,8 @@ public void testUnicastSinglePingResponseContainsMaster() throws Exception { * Test cluster join with issues in cluster state publishing * */ public void testClusterJoinDespiteOfPublishingIssues() throws Exception { - List nodes = startCluster(2, 1); - + List nodes = startCluster(2); + setMinimumMasterNodes(1); String masterNode = internalCluster().getMasterName(); String nonMasterNode; if (masterNode.equals(nodes.get(0))) { @@ -196,7 +194,7 @@ public void testClusterJoinDespiteOfPublishingIssues() throws Exception { } public void testClusterFormingWithASlowNode() throws Exception { - configureCluster(3, 2); + configureCluster(3); SlowClusterStateProcessing disruption = new SlowClusterStateProcessing(random(), 0, 0, 1000, 2000); @@ -212,7 +210,7 @@ public void testClusterFormingWithASlowNode() throws Exception { } public void testElectMasterWithLatestVersion() throws Exception { - configureCluster(3, 2); + configureCluster(3); final Set nodes = new HashSet<>(internalCluster().startNodes(3)); ensureStableCluster(3); ServiceDisruptionScheme isolateAllNodes = diff --git a/server/src/test/java/org/elasticsearch/discovery/MasterDisruptionIT.java b/server/src/test/java/org/elasticsearch/discovery/MasterDisruptionIT.java index f7716c6f146ff..3bf39de9ac425 100644 --- a/server/src/test/java/org/elasticsearch/discovery/MasterDisruptionIT.java +++ b/server/src/test/java/org/elasticsearch/discovery/MasterDisruptionIT.java @@ -38,7 +38,6 @@ import org.elasticsearch.discovery.zen.ElectMasterService; import org.elasticsearch.discovery.zen.ZenDiscovery; import org.elasticsearch.monitor.jvm.HotThreads; -import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.disruption.BlockMasterServiceOnMaster; import org.elasticsearch.test.disruption.IntermittentLongGCDisruption; import org.elasticsearch.test.disruption.LongGCDisruption; @@ -67,7 +66,6 @@ /** * Tests relating to the loss of the master. */ -@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, transportClientRatio = 0, autoMinMasterNodes = false) @TestLogging("_root:DEBUG,org.elasticsearch.cluster.service:TRACE") public class MasterDisruptionIT extends AbstractDisruptionTestCase { @@ -153,8 +151,8 @@ public void testNodesFDAfterMasterReelection() throws Exception { */ @TestLogging("_root:DEBUG,org.elasticsearch.cluster.service:TRACE,org.elasticsearch.test.disruption:TRACE") public void testStaleMasterNotHijackingMajority() throws Exception { - // 3 node cluster with unicast discovery and minimum_master_nodes set to 2: - final List nodes = startCluster(3, 2); + // 3 node cluster with unicast discovery and minimum_master_nodes set to the default of 2: + final List nodes = startCluster(3); // Save the current master node as old master node, because that node will get frozen final String oldMasterNode = internalCluster().getMasterName(); @@ -267,7 +265,7 @@ public void onFailure(String source, Exception e) { * Test that cluster recovers from a long GC on master that causes other nodes to elect a new one */ public void testMasterNodeGCs() throws Exception { - List nodes = startCluster(3, -1); + List nodes = startCluster(3); String oldMasterNode = internalCluster().getMasterName(); // a very long GC, but it's OK as we remove the disruption when it has had an effect diff --git a/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java b/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java index 4c9edf6e17eb1..0ca27eb965920 100644 --- a/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java +++ b/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java @@ -31,7 +31,6 @@ import org.elasticsearch.snapshots.SnapshotInfo; import org.elasticsearch.snapshots.SnapshotMissingException; import org.elasticsearch.snapshots.SnapshotState; -import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.disruption.NetworkDisruption; import org.elasticsearch.test.junit.annotations.TestLogging; @@ -49,7 +48,6 @@ /** * Tests snapshot operations during disruptions. */ -@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, transportClientRatio = 0, autoMinMasterNodes = false) @TestLogging("org.elasticsearch.snapshot:TRACE") public class SnapshotDisruptionIT extends AbstractDisruptionTestCase { @@ -59,9 +57,10 @@ public void testDisruptionOnSnapshotInitialization() throws Exception { .put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), "30s") // wait till cluster state is committed .build(); final String idxName = "test"; - configureCluster(settings, 4, 2); + configureCluster(settings, 4); final List allMasterEligibleNodes = internalCluster().startMasterOnlyNodes(3); final String dataNode = internalCluster().startDataOnlyNode(); + setMinimumMasterNodes(2); ensureStableCluster(4); createRandomIndex(idxName); From 1974013dceddc846c46a944d358509b95bfa0f4e Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 30 Oct 2018 11:46:59 +0100 Subject: [PATCH 2/5] CR: further simplify settings --- .../discovery/AbstractDisruptionTestCase.java | 48 ++++++++----------- .../discovery/ClusterDisruptionIT.java | 8 ++-- .../discovery/DiscoveryDisruptionIT.java | 15 ++---- .../discovery/MasterDisruptionIT.java | 2 + .../discovery/SnapshotDisruptionIT.java | 5 +- 5 files changed, 32 insertions(+), 46 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java b/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java index 232a4c40d337d..1b5827d0c4c15 100644 --- a/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java +++ b/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java @@ -29,7 +29,6 @@ import org.elasticsearch.discovery.zen.FaultDetection; import org.elasticsearch.discovery.zen.UnicastZenPing; import org.elasticsearch.discovery.zen.ZenPing; -import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.InternalTestCluster; @@ -53,20 +52,32 @@ import java.util.Set; import java.util.concurrent.TimeUnit; -import static org.elasticsearch.discovery.DiscoveryModule.DISCOVERY_HOSTS_PROVIDER_SETTING; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.not; -@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, transportClientRatio = 0) public abstract class AbstractDisruptionTestCase extends ESIntegTestCase { static final TimeValue DISRUPTION_HEALING_OVERHEAD = TimeValue.timeValueSeconds(40); // we use 30s as timeout in many places. + static final Settings DEFAULT_SETTINGS = Settings.builder() + .put(FaultDetection.PING_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly + .put(FaultDetection.PING_RETRIES_SETTING.getKey(), "1") // for hitting simulated network failures quickly + .put("discovery.zen.join_timeout", "10s") // still long to induce failures but to long so test won't time out + .put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), "1s") // <-- for hitting simulated network failures quickly + .put(TransportService.TCP_CONNECT_TIMEOUT.getKey(), "10s") // Network delay disruption waits for the min between this + // value and the time of disruption and does not recover immediately + // when disruption is stop. We should make sure we recover faster + // then the default of 30s, causing ensureGreen and friends to time out + .build(); + private Settings currentSettings; @Override protected Settings nodeSettings(int nodeOrdinal) { - return Settings.builder().put(currentSettings) + if (currentSettings == null) { + currentSettings = DEFAULT_SETTINGS; + } + return Settings.builder().put(super.nodeSettings(nodeOrdinal)).put(currentSettings) .put(TestZenDiscovery.USE_MOCK_PINGS.getKey(), false).build(); } @@ -117,7 +128,6 @@ protected void beforeIndexDeletion() throws Exception { } List startCluster(int numberOfNodes) { - configureCluster(numberOfNodes); InternalTestCluster internalCluster = internalCluster(); List nodes = internalCluster.startNodes(numberOfNodes); ensureStableCluster(numberOfNodes); @@ -130,36 +140,16 @@ List startCluster(int numberOfNodes) { return nodes; } - static final Settings DEFAULT_SETTINGS = Settings.builder() - .put(FaultDetection.PING_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly - .put(FaultDetection.PING_RETRIES_SETTING.getKey(), "1") // for hitting simulated network failures quickly - .put("discovery.zen.join_timeout", "10s") // still long to induce failures but to long so test won't time out - .put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), "1s") // <-- for hitting simulated network failures quickly - .put(TransportService.TCP_CONNECT_TIMEOUT.getKey(), "10s") // Network delay disruption waits for the min between this - // value and the time of disruption and does not recover immediately - // when disruption is stop. We should make sure we recover faster - // then the default of 30s, causing ensureGreen and friends to time out - .build(); - @Override protected Collection> nodePlugins() { return Arrays.asList(MockTransportService.TestPlugin.class); } - void configureCluster(int numberOfNodes) { - configureCluster(DEFAULT_SETTINGS, numberOfNodes); - } - - void configureCluster(Settings settings, int numberOfNodes) { + void configureCluster(Settings settings) { logger.info("---> configured unicast"); - if (currentSettings == null) { - // TODO: Rarely use default settings form some of these - currentSettings = Settings.builder() - .put(settings) - .put(NodeEnvironment.MAX_LOCAL_STORAGE_NODES_SETTING.getKey(), numberOfNodes) - .putList(DISCOVERY_HOSTS_PROVIDER_SETTING.getKey(), "file") - .build(); - } + assert currentSettings == null; + // TODO: Rarely use default settings form some of these + currentSettings = Settings.builder().put(settings).build(); } ClusterState getNodeClusterState(String node) { diff --git a/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java b/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java index 3b85080ff31d2..0f3337aa74b1e 100644 --- a/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java +++ b/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java @@ -37,6 +37,7 @@ import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.indices.store.IndicesStoreIntegrationIT; +import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.InternalTestCluster; import org.elasticsearch.test.disruption.NetworkDisruption; import org.elasticsearch.test.disruption.NetworkDisruption.Bridge; @@ -71,6 +72,7 @@ * Tests various cluster operations (e.g., indexing) during disruptions. */ @TestLogging("_root:DEBUG,org.elasticsearch.cluster.service:TRACE") +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, transportClientRatio = 0) public class ClusterDisruptionIT extends AbstractDisruptionTestCase { /** @@ -360,7 +362,7 @@ public void onFailure(Exception e) { */ public void testSearchWithRelocationAndSlowClusterStateProcessing() throws Exception { // don't use DEFAULT settings (which can cause node disconnects on a slow CI machine) - configureCluster(Settings.EMPTY, 3); + configureCluster(Settings.EMPTY); internalCluster().startMasterOnlyNode(); final String node_1 = internalCluster().startDataOnlyNode(); @@ -387,10 +389,8 @@ public void testSearchWithRelocationAndSlowClusterStateProcessing() throws Excep public void testIndexImportedFromDataOnlyNodesIfMasterLostDataFolder() throws Exception { // test for https://github.com/elastic/elasticsearch/issues/8823 - configureCluster(2); String masterNode = internalCluster().startMasterOnlyNode(Settings.EMPTY); internalCluster().startDataOnlyNode(Settings.EMPTY); - setMinimumMasterNodes(1); ensureStableCluster(2); assertAcked(prepareCreate("index").setSettings(Settings.builder().put("index.number_of_replicas", 0))); index("index", "_doc", "1", jsonBuilder().startObject().field("text", "some text").endObject()); @@ -418,7 +418,7 @@ public void testIndicesDeleted() throws Exception { .put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), "30s") // wait till cluster state is committed .build(); final String idxName = "test"; - configureCluster(settings, 3); + configureCluster(settings); final List allMasterEligibleNodes = internalCluster().startMasterOnlyNodes(2); final String dataNode = internalCluster().startDataOnlyNode(); ensureStableCluster(3); diff --git a/server/src/test/java/org/elasticsearch/discovery/DiscoveryDisruptionIT.java b/server/src/test/java/org/elasticsearch/discovery/DiscoveryDisruptionIT.java index 34483566b0b77..610965b5a519a 100644 --- a/server/src/test/java/org/elasticsearch/discovery/DiscoveryDisruptionIT.java +++ b/server/src/test/java/org/elasticsearch/discovery/DiscoveryDisruptionIT.java @@ -28,6 +28,7 @@ import org.elasticsearch.discovery.zen.PublishClusterStateAction; import org.elasticsearch.discovery.zen.UnicastZenPing; import org.elasticsearch.discovery.zen.ZenPing; +import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.discovery.TestZenDiscovery; import org.elasticsearch.test.disruption.NetworkDisruption; import org.elasticsearch.test.disruption.NetworkDisruption.NetworkDisconnect; @@ -54,6 +55,7 @@ * Tests for discovery during disruptions. */ @TestLogging("_root:DEBUG,org.elasticsearch.cluster.service:TRACE") +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, transportClientRatio = 0) public class DiscoveryDisruptionIT extends AbstractDisruptionTestCase { public void testIsolatedUnicastNodes() throws Exception { @@ -136,15 +138,8 @@ public void testUnicastSinglePingResponseContainsMaster() throws Exception { * Test cluster join with issues in cluster state publishing * */ public void testClusterJoinDespiteOfPublishingIssues() throws Exception { - List nodes = startCluster(2); - setMinimumMasterNodes(1); - String masterNode = internalCluster().getMasterName(); - String nonMasterNode; - if (masterNode.equals(nodes.get(0))) { - nonMasterNode = nodes.get(1); - } else { - nonMasterNode = nodes.get(0); - } + String masterNode = internalCluster().startMasterOnlyNode(Settings.EMPTY); + String nonMasterNode = internalCluster().startDataOnlyNode(Settings.EMPTY); DiscoveryNodes discoveryNodes = internalCluster().getInstance(ClusterService.class, nonMasterNode).state().nodes(); @@ -194,7 +189,6 @@ public void testClusterJoinDespiteOfPublishingIssues() throws Exception { } public void testClusterFormingWithASlowNode() throws Exception { - configureCluster(3); SlowClusterStateProcessing disruption = new SlowClusterStateProcessing(random(), 0, 0, 1000, 2000); @@ -210,7 +204,6 @@ public void testClusterFormingWithASlowNode() throws Exception { } public void testElectMasterWithLatestVersion() throws Exception { - configureCluster(3); final Set nodes = new HashSet<>(internalCluster().startNodes(3)); ensureStableCluster(3); ServiceDisruptionScheme isolateAllNodes = diff --git a/server/src/test/java/org/elasticsearch/discovery/MasterDisruptionIT.java b/server/src/test/java/org/elasticsearch/discovery/MasterDisruptionIT.java index 3bf39de9ac425..9050f95698fb4 100644 --- a/server/src/test/java/org/elasticsearch/discovery/MasterDisruptionIT.java +++ b/server/src/test/java/org/elasticsearch/discovery/MasterDisruptionIT.java @@ -38,6 +38,7 @@ import org.elasticsearch.discovery.zen.ElectMasterService; import org.elasticsearch.discovery.zen.ZenDiscovery; import org.elasticsearch.monitor.jvm.HotThreads; +import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.disruption.BlockMasterServiceOnMaster; import org.elasticsearch.test.disruption.IntermittentLongGCDisruption; import org.elasticsearch.test.disruption.LongGCDisruption; @@ -67,6 +68,7 @@ * Tests relating to the loss of the master. */ @TestLogging("_root:DEBUG,org.elasticsearch.cluster.service:TRACE") +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, transportClientRatio = 0) public class MasterDisruptionIT extends AbstractDisruptionTestCase { /** diff --git a/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java b/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java index 0ca27eb965920..91df112cc0f7c 100644 --- a/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java +++ b/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java @@ -31,6 +31,7 @@ import org.elasticsearch.snapshots.SnapshotInfo; import org.elasticsearch.snapshots.SnapshotMissingException; import org.elasticsearch.snapshots.SnapshotState; +import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.disruption.NetworkDisruption; import org.elasticsearch.test.junit.annotations.TestLogging; @@ -49,6 +50,7 @@ * Tests snapshot operations during disruptions. */ @TestLogging("org.elasticsearch.snapshot:TRACE") +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, transportClientRatio = 0) public class SnapshotDisruptionIT extends AbstractDisruptionTestCase { public void testDisruptionOnSnapshotInitialization() throws Exception { @@ -57,10 +59,9 @@ public void testDisruptionOnSnapshotInitialization() throws Exception { .put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), "30s") // wait till cluster state is committed .build(); final String idxName = "test"; - configureCluster(settings, 4); + configureCluster(settings); final List allMasterEligibleNodes = internalCluster().startMasterOnlyNodes(3); final String dataNode = internalCluster().startDataOnlyNode(); - setMinimumMasterNodes(2); ensureStableCluster(4); createRandomIndex(idxName); From cd1ed8711f141158c5997d3626d1940329f9c7d4 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 30 Oct 2018 12:30:26 +0100 Subject: [PATCH 3/5] CR: Simply settings setup --- .../discovery/SnapshotDisruptionIT.java | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java b/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java index 91df112cc0f7c..7d4fcff2f47d3 100644 --- a/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java +++ b/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java @@ -40,7 +40,6 @@ import java.util.List; import java.util.Set; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; @@ -53,13 +52,15 @@ @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, transportClientRatio = 0) public class SnapshotDisruptionIT extends AbstractDisruptionTestCase { - public void testDisruptionOnSnapshotInitialization() throws Exception { - final Settings settings = Settings.builder() - .put(DEFAULT_SETTINGS) - .put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), "30s") // wait till cluster state is committed + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder().put(super.nodeSettings(nodeOrdinal)) + .put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), "30s") .build(); + } + + public void testDisruptionOnSnapshotInitialization() throws Exception { final String idxName = "test"; - configureCluster(settings); final List allMasterEligibleNodes = internalCluster().startMasterOnlyNodes(3); final String dataNode = internalCluster().startDataOnlyNode(); ensureStableCluster(4); @@ -159,7 +160,7 @@ public void clusterChanged(ClusterChangedEvent event) { } } - private void createRandomIndex(String idxName) throws ExecutionException, InterruptedException { + private void createRandomIndex(String idxName) throws InterruptedException { assertAcked(prepareCreate(idxName, 0, Settings.builder().put("number_of_shards", between(1, 20)) .put("number_of_replicas", 0))); logger.info("--> indexing some data"); From 471ae320eb24bc7ac65f3e7203106ea6f7c0c1ab Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 30 Oct 2018 16:55:15 +0100 Subject: [PATCH 4/5] CR: No more mutable settings field in disruption tests --- .../discovery/AbstractDisruptionTestCase.java | 19 +-- .../ClusterDisruptionCleanSettingsIT.java | 113 ++++++++++++++++++ .../discovery/ClusterDisruptionIT.java | 72 ----------- .../discovery/SnapshotDisruptionIT.java | 14 ++- 4 files changed, 127 insertions(+), 91 deletions(-) create mode 100644 server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionCleanSettingsIT.java diff --git a/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java b/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java index 1b5827d0c4c15..063ab1ffffef4 100644 --- a/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java +++ b/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java @@ -70,22 +70,12 @@ public abstract class AbstractDisruptionTestCase extends ESIntegTestCase { // then the default of 30s, causing ensureGreen and friends to time out .build(); - private Settings currentSettings; - @Override protected Settings nodeSettings(int nodeOrdinal) { - if (currentSettings == null) { - currentSettings = DEFAULT_SETTINGS; - } - return Settings.builder().put(super.nodeSettings(nodeOrdinal)).put(currentSettings) + return Settings.builder().put(super.nodeSettings(nodeOrdinal)).put(DEFAULT_SETTINGS) .put(TestZenDiscovery.USE_MOCK_PINGS.getKey(), false).build(); } - @Before - public void clearConfig() { - currentSettings = null; - } - @Override protected int numberOfShards() { return 3; @@ -145,13 +135,6 @@ protected Collection> nodePlugins() { return Arrays.asList(MockTransportService.TestPlugin.class); } - void configureCluster(Settings settings) { - logger.info("---> configured unicast"); - assert currentSettings == null; - // TODO: Rarely use default settings form some of these - currentSettings = Settings.builder().put(settings).build(); - } - ClusterState getNodeClusterState(String node) { return client(node).admin().cluster().prepareState().setLocal(true).get().getState(); } diff --git a/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionCleanSettingsIT.java b/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionCleanSettingsIT.java new file mode 100644 index 0000000000000..1c4c9de5a0b96 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionCleanSettingsIT.java @@ -0,0 +1,113 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.discovery; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import org.elasticsearch.action.index.IndexRequestBuilder; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.indices.store.IndicesStoreIntegrationIT; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.InternalTestCluster; +import org.elasticsearch.test.disruption.NetworkDisruption; +import org.elasticsearch.test.transport.MockTransportService; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.hamcrest.Matchers.equalTo; + +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, transportClientRatio = 0) +public class ClusterDisruptionCleanSettingsIT extends ESIntegTestCase { + + @Override + protected Collection> nodePlugins() { + return Arrays.asList(MockTransportService.TestPlugin.class); + } + + /** + * This test creates a scenario where a primary shard (0 replicas) relocates and is in POST_RECOVERY on the target + * node but already deleted on the source node. Search request should still work. + */ + public void testSearchWithRelocationAndSlowClusterStateProcessing() throws Exception { + internalCluster().startMasterOnlyNode(); + final String node_1 = internalCluster().startDataOnlyNode(); + + logger.info("--> creating index [test] with one shard and on replica"); + assertAcked(prepareCreate("test").setSettings( + Settings.builder().put(indexSettings()) + .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)) + ); + ensureGreen("test"); + + final String node_2 = internalCluster().startDataOnlyNode(); + List indexRequestBuilderList = new ArrayList<>(); + for (int i = 0; i < 100; i++) { + indexRequestBuilderList.add(client().prepareIndex().setIndex("test").setType("_doc") + .setSource("{\"int_field\":1}", XContentType.JSON)); + } + indexRandom(true, indexRequestBuilderList); + + IndicesStoreIntegrationIT.relocateAndBlockCompletion(logger, "test", 0, node_1, node_2); + // now search for the documents and see if we get a reply + assertThat(client().prepareSearch().setSize(0).get().getHits().getTotalHits(), equalTo(100L)); + } + + /** + * Tests that indices are properly deleted even if there is a master transition in between. + * Test for https://github.com/elastic/elasticsearch/issues/11665 + */ + public void testIndicesDeleted() throws Exception { + final Settings settings = Settings.builder() + .put(AbstractDisruptionTestCase.DEFAULT_SETTINGS) + .put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), "0s") // don't wait on isolated data node + .put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), "30s") // wait till cluster state is committed + .build(); + final String idxName = "test"; + final List allMasterEligibleNodes = internalCluster().startMasterOnlyNodes(2, settings); + final String dataNode = internalCluster().startDataOnlyNode(settings); + ensureStableCluster(3); + assertAcked(prepareCreate("test")); + + final String masterNode1 = internalCluster().getMasterName(); + NetworkDisruption networkDisruption = + new NetworkDisruption(new NetworkDisruption.TwoPartitions(masterNode1, dataNode), new NetworkDisruption.NetworkUnresponsive()); + internalCluster().setDisruptionScheme(networkDisruption); + networkDisruption.startDisrupting(); + // We know this will time out due to the partition, we check manually below to not proceed until + // the delete has been applied to the master node and the master eligible node. + internalCluster().client(masterNode1).admin().indices().prepareDelete(idxName).setTimeout("0s").get(); + // Don't restart the master node until we know the index deletion has taken effect on master and the master eligible node. + assertBusy(() -> { + for (String masterNode : allMasterEligibleNodes) { + final ClusterState masterState = internalCluster().clusterService(masterNode).state(); + assertTrue("index not deleted on " + masterNode, masterState.metaData().hasIndex(idxName) == false); + } + }); + internalCluster().restartNode(masterNode1, InternalTestCluster.EMPTY_CALLBACK); + ensureYellow(); + assertFalse(client().admin().indices().prepareExists(idxName).get().isExists()); + } +} diff --git a/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java b/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java index 0f3337aa74b1e..7e4cc17f544ce 100644 --- a/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java +++ b/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java @@ -24,10 +24,8 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.NoShardAvailableActionException; import org.elasticsearch.action.get.GetResponse; -import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.client.Client; -import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.action.shard.ShardStateAction; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.routing.Murmur3HashFunction; @@ -36,7 +34,6 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.xcontent.XContentType; -import org.elasticsearch.indices.store.IndicesStoreIntegrationIT; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.InternalTestCluster; import org.elasticsearch.test.disruption.NetworkDisruption; @@ -356,37 +353,6 @@ public void onFailure(Exception e) { } } - /** - * This test creates a scenario where a primary shard (0 replicas) relocates and is in POST_RECOVERY on the target - * node but already deleted on the source node. Search request should still work. - */ - public void testSearchWithRelocationAndSlowClusterStateProcessing() throws Exception { - // don't use DEFAULT settings (which can cause node disconnects on a slow CI machine) - configureCluster(Settings.EMPTY); - internalCluster().startMasterOnlyNode(); - final String node_1 = internalCluster().startDataOnlyNode(); - - logger.info("--> creating index [test] with one shard and on replica"); - assertAcked(prepareCreate("test").setSettings( - Settings.builder().put(indexSettings()) - .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) - .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)) - ); - ensureGreen("test"); - - final String node_2 = internalCluster().startDataOnlyNode(); - List indexRequestBuilderList = new ArrayList<>(); - for (int i = 0; i < 100; i++) { - indexRequestBuilderList.add(client().prepareIndex().setIndex("test").setType("_doc") - .setSource("{\"int_field\":1}", XContentType.JSON)); - } - indexRandom(true, indexRequestBuilderList); - - IndicesStoreIntegrationIT.relocateAndBlockCompletion(logger, "test", 0, node_1, node_2); - // now search for the documents and see if we get a reply - assertThat(client().prepareSearch().setSize(0).get().getHits().getTotalHits(), equalTo(100L)); - } - public void testIndexImportedFromDataOnlyNodesIfMasterLostDataFolder() throws Exception { // test for https://github.com/elastic/elasticsearch/issues/8823 String masterNode = internalCluster().startMasterOnlyNode(Settings.EMPTY); @@ -406,42 +372,4 @@ public boolean clearData(String nodeName) { ensureGreen("index"); assertTrue(client().prepareGet("index", "_doc", "1").get().isExists()); } - - /** - * Tests that indices are properly deleted even if there is a master transition in between. - * Test for https://github.com/elastic/elasticsearch/issues/11665 - */ - public void testIndicesDeleted() throws Exception { - final Settings settings = Settings.builder() - .put(DEFAULT_SETTINGS) - .put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), "0s") // don't wait on isolated data node - .put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), "30s") // wait till cluster state is committed - .build(); - final String idxName = "test"; - configureCluster(settings); - final List allMasterEligibleNodes = internalCluster().startMasterOnlyNodes(2); - final String dataNode = internalCluster().startDataOnlyNode(); - ensureStableCluster(3); - assertAcked(prepareCreate("test")); - - final String masterNode1 = internalCluster().getMasterName(); - NetworkDisruption networkDisruption = - new NetworkDisruption(new TwoPartitions(masterNode1, dataNode), new NetworkDisruption.NetworkUnresponsive()); - internalCluster().setDisruptionScheme(networkDisruption); - networkDisruption.startDisrupting(); - // We know this will time out due to the partition, we check manually below to not proceed until - // the delete has been applied to the master node and the master eligible node. - internalCluster().client(masterNode1).admin().indices().prepareDelete(idxName).setTimeout("0s").get(); - // Don't restart the master node until we know the index deletion has taken effect on master and the master eligible node. - assertBusy(() -> { - for (String masterNode : allMasterEligibleNodes) { - final ClusterState masterState = internalCluster().clusterService(masterNode).state(); - assertTrue("index not deleted on " + masterNode, masterState.metaData().hasIndex(idxName) == false); - } - }); - internalCluster().restartNode(masterNode1, InternalTestCluster.EMPTY_CALLBACK); - ensureYellow(); - assertFalse(client().admin().indices().prepareExists(idxName).get().isExists()); - } - } diff --git a/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java b/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java index 7d4fcff2f47d3..b5ca74a35465f 100644 --- a/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java +++ b/server/src/test/java/org/elasticsearch/discovery/SnapshotDisruptionIT.java @@ -18,6 +18,8 @@ */ package org.elasticsearch.discovery; +import java.util.Arrays; +import java.util.Collection; import org.elasticsearch.action.ActionFuture; import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse; import org.elasticsearch.action.admin.cluster.snapshots.get.GetSnapshotsResponse; @@ -28,10 +30,12 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeUnit; +import org.elasticsearch.plugins.Plugin; import org.elasticsearch.snapshots.SnapshotInfo; import org.elasticsearch.snapshots.SnapshotMissingException; import org.elasticsearch.snapshots.SnapshotState; import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.discovery.TestZenDiscovery; import org.elasticsearch.test.disruption.NetworkDisruption; import org.elasticsearch.test.junit.annotations.TestLogging; @@ -41,6 +45,7 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import org.elasticsearch.test.transport.MockTransportService; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.hamcrest.Matchers.instanceOf; @@ -50,11 +55,18 @@ */ @TestLogging("org.elasticsearch.snapshot:TRACE") @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, transportClientRatio = 0) -public class SnapshotDisruptionIT extends AbstractDisruptionTestCase { +public class SnapshotDisruptionIT extends ESIntegTestCase { + + @Override + protected Collection> nodePlugins() { + return Arrays.asList(MockTransportService.TestPlugin.class); + } @Override protected Settings nodeSettings(int nodeOrdinal) { return Settings.builder().put(super.nodeSettings(nodeOrdinal)) + .put(AbstractDisruptionTestCase.DEFAULT_SETTINGS) + .put(TestZenDiscovery.USE_MOCK_PINGS.getKey(), false) .put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), "30s") .build(); } From 2146d2a0936958dc621b7c0df8b45504012e49f9 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 30 Oct 2018 18:34:24 +0100 Subject: [PATCH 5/5] CR: comments --- .../discovery/AbstractDisruptionTestCase.java | 21 +++++----- .../ClusterDisruptionCleanSettingsIT.java | 41 +------------------ .../discovery/ClusterDisruptionIT.java | 37 +++++++++++++++++ 3 files changed, 50 insertions(+), 49 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java b/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java index 063ab1ffffef4..c91c58647b9fa 100644 --- a/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java +++ b/server/src/test/java/org/elasticsearch/discovery/AbstractDisruptionTestCase.java @@ -59,16 +59,6 @@ public abstract class AbstractDisruptionTestCase extends ESIntegTestCase { static final TimeValue DISRUPTION_HEALING_OVERHEAD = TimeValue.timeValueSeconds(40); // we use 30s as timeout in many places. - static final Settings DEFAULT_SETTINGS = Settings.builder() - .put(FaultDetection.PING_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly - .put(FaultDetection.PING_RETRIES_SETTING.getKey(), "1") // for hitting simulated network failures quickly - .put("discovery.zen.join_timeout", "10s") // still long to induce failures but to long so test won't time out - .put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), "1s") // <-- for hitting simulated network failures quickly - .put(TransportService.TCP_CONNECT_TIMEOUT.getKey(), "10s") // Network delay disruption waits for the min between this - // value and the time of disruption and does not recover immediately - // when disruption is stop. We should make sure we recover faster - // then the default of 30s, causing ensureGreen and friends to time out - .build(); @Override protected Settings nodeSettings(int nodeOrdinal) { @@ -130,6 +120,17 @@ List startCluster(int numberOfNodes) { return nodes; } + static final Settings DEFAULT_SETTINGS = Settings.builder() + .put(FaultDetection.PING_TIMEOUT_SETTING.getKey(), "1s") // for hitting simulated network failures quickly + .put(FaultDetection.PING_RETRIES_SETTING.getKey(), "1") // for hitting simulated network failures quickly + .put("discovery.zen.join_timeout", "10s") // still long to induce failures but to long so test won't time out + .put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), "1s") // <-- for hitting simulated network failures quickly + .put(TransportService.TCP_CONNECT_TIMEOUT.getKey(), "10s") // Network delay disruption waits for the min between this + // value and the time of disruption and does not recover immediately + // when disruption is stop. We should make sure we recover faster + // then the default of 30s, causing ensureGreen and friends to time out + .build(); + @Override protected Collection> nodePlugins() { return Arrays.asList(MockTransportService.TestPlugin.class); diff --git a/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionCleanSettingsIT.java b/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionCleanSettingsIT.java index 1c4c9de5a0b96..2d0604d8d2894 100644 --- a/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionCleanSettingsIT.java +++ b/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionCleanSettingsIT.java @@ -24,15 +24,12 @@ import java.util.Collection; import java.util.List; import org.elasticsearch.action.index.IndexRequestBuilder; -import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.indices.store.IndicesStoreIntegrationIT; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESIntegTestCase; -import org.elasticsearch.test.InternalTestCluster; -import org.elasticsearch.test.disruption.NetworkDisruption; import org.elasticsearch.test.transport.MockTransportService; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; @@ -51,6 +48,8 @@ protected Collection> nodePlugins() { * node but already deleted on the source node. Search request should still work. */ public void testSearchWithRelocationAndSlowClusterStateProcessing() throws Exception { + // Don't use AbstractDisruptionTestCase.DEFAULT_SETTINGS as settings + // (which can cause node disconnects on a slow CI machine) internalCluster().startMasterOnlyNode(); final String node_1 = internalCluster().startDataOnlyNode(); @@ -74,40 +73,4 @@ public void testSearchWithRelocationAndSlowClusterStateProcessing() throws Excep // now search for the documents and see if we get a reply assertThat(client().prepareSearch().setSize(0).get().getHits().getTotalHits(), equalTo(100L)); } - - /** - * Tests that indices are properly deleted even if there is a master transition in between. - * Test for https://github.com/elastic/elasticsearch/issues/11665 - */ - public void testIndicesDeleted() throws Exception { - final Settings settings = Settings.builder() - .put(AbstractDisruptionTestCase.DEFAULT_SETTINGS) - .put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), "0s") // don't wait on isolated data node - .put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), "30s") // wait till cluster state is committed - .build(); - final String idxName = "test"; - final List allMasterEligibleNodes = internalCluster().startMasterOnlyNodes(2, settings); - final String dataNode = internalCluster().startDataOnlyNode(settings); - ensureStableCluster(3); - assertAcked(prepareCreate("test")); - - final String masterNode1 = internalCluster().getMasterName(); - NetworkDisruption networkDisruption = - new NetworkDisruption(new NetworkDisruption.TwoPartitions(masterNode1, dataNode), new NetworkDisruption.NetworkUnresponsive()); - internalCluster().setDisruptionScheme(networkDisruption); - networkDisruption.startDisrupting(); - // We know this will time out due to the partition, we check manually below to not proceed until - // the delete has been applied to the master node and the master eligible node. - internalCluster().client(masterNode1).admin().indices().prepareDelete(idxName).setTimeout("0s").get(); - // Don't restart the master node until we know the index deletion has taken effect on master and the master eligible node. - assertBusy(() -> { - for (String masterNode : allMasterEligibleNodes) { - final ClusterState masterState = internalCluster().clusterService(masterNode).state(); - assertTrue("index not deleted on " + masterNode, masterState.metaData().hasIndex(idxName) == false); - } - }); - internalCluster().restartNode(masterNode1, InternalTestCluster.EMPTY_CALLBACK); - ensureYellow(); - assertFalse(client().admin().indices().prepareExists(idxName).get().isExists()); - } } diff --git a/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java b/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java index 7e4cc17f544ce..5dc9f537f320e 100644 --- a/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java +++ b/server/src/test/java/org/elasticsearch/discovery/ClusterDisruptionIT.java @@ -26,6 +26,7 @@ import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.client.Client; +import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.action.shard.ShardStateAction; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.routing.Murmur3HashFunction; @@ -372,4 +373,40 @@ public boolean clearData(String nodeName) { ensureGreen("index"); assertTrue(client().prepareGet("index", "_doc", "1").get().isExists()); } + + /** + * Tests that indices are properly deleted even if there is a master transition in between. + * Test for https://github.com/elastic/elasticsearch/issues/11665 + */ + public void testIndicesDeleted() throws Exception { + final Settings settings = Settings.builder() + .put(DiscoverySettings.PUBLISH_TIMEOUT_SETTING.getKey(), "0s") // don't wait on isolated data node + .put(DiscoverySettings.COMMIT_TIMEOUT_SETTING.getKey(), "30s") // wait till cluster state is committed + .build(); + final String idxName = "test"; + final List allMasterEligibleNodes = internalCluster().startMasterOnlyNodes(2, settings); + final String dataNode = internalCluster().startDataOnlyNode(settings); + ensureStableCluster(3); + assertAcked(prepareCreate("test")); + + final String masterNode1 = internalCluster().getMasterName(); + NetworkDisruption networkDisruption = + new NetworkDisruption(new TwoPartitions(masterNode1, dataNode), new NetworkDisruption.NetworkUnresponsive()); + internalCluster().setDisruptionScheme(networkDisruption); + networkDisruption.startDisrupting(); + // We know this will time out due to the partition, we check manually below to not proceed until + // the delete has been applied to the master node and the master eligible node. + internalCluster().client(masterNode1).admin().indices().prepareDelete(idxName).setTimeout("0s").get(); + // Don't restart the master node until we know the index deletion has taken effect on master and the master eligible node. + assertBusy(() -> { + for (String masterNode : allMasterEligibleNodes) { + final ClusterState masterState = internalCluster().clusterService(masterNode).state(); + assertTrue("index not deleted on " + masterNode, masterState.metaData().hasIndex(idxName) == false); + } + }); + internalCluster().restartNode(masterNode1, InternalTestCluster.EMPTY_CALLBACK); + ensureYellow(); + assertFalse(client().admin().indices().prepareExists(idxName).get().isExists()); + } + }