From e2290ae973908b6d269cc2e85f4a4c45e3fc153a Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Mon, 25 Feb 2019 17:17:15 -0800 Subject: [PATCH 1/4] HDDS-1178. Healthy pipeline Chill Mode Rule. --- .../apache/hadoop/hdds/HddsConfigKeys.java | 9 ++ ...java => HealthyPipelineChillModeRule.java} | 102 +++++++----- .../scm/chillmode/SCMChillModeManager.java | 15 +- .../TestHealthyPipelineChillModeRule.java | 152 ++++++++++++++++++ 4 files changed, 238 insertions(+), 40 deletions(-) rename hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/{PipelineChillModeRule.java => HealthyPipelineChillModeRule.java} (54%) create mode 100644 hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java index 29132302dcc72..007104f7563b9 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsConfigKeys.java @@ -80,6 +80,15 @@ public final class HddsConfigKeys { public static final String HDDS_SCM_CHILLMODE_THRESHOLD_PCT = "hdds.scm.chillmode.threshold.pct"; public static final double HDDS_SCM_CHILLMODE_THRESHOLD_PCT_DEFAULT = 0.99; + + + // percentage of healthy pipelines, where all 3 datanodes are reported in the + // pipeline. + public static final String HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT = + "hdds.scm.chillmode.healthy.pipelie.pct"; + public static final double + HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT_DEFAULT = 0.10; + public static final String HDDS_LOCK_MAX_CONCURRENCY = "hdds.lock.max.concurrency"; public static final int HDDS_LOCK_MAX_CONCURRENCY_DEFAULT = 100; diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/PipelineChillModeRule.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/HealthyPipelineChillModeRule.java similarity index 54% rename from hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/PipelineChillModeRule.java rename to hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/HealthyPipelineChillModeRule.java index f9a6e59ba26ef..8392fe0ba5be5 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/PipelineChillModeRule.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/HealthyPipelineChillModeRule.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdds.scm.chillmode; -import java.util.concurrent.atomic.AtomicBoolean; - +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReport; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; @@ -30,33 +30,79 @@ import org.apache.hadoop.hdds.server.events.EventPublisher; import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Class defining Chill mode exit criteria for Pipelines. + * + * This rule defines percentage of healthy pipelines need to be reported. + * Once chill mode exit happens, this rules take care of writes can go + * through in a cluster. */ -public class PipelineChillModeRule +public class HealthyPipelineChillModeRule implements ChillModeExitRule, EventHandler { - /** Pipeline availability.*/ - private AtomicBoolean isPipelineAvailable = new AtomicBoolean(false); + private static final Logger LOG = + LoggerFactory.getLogger(HealthyPipelineChillModeRule.class); private final PipelineManager pipelineManager; private final SCMChillModeManager chillModeManager; + private final int healthyPipelineThresholdCount; + private int currentHealthyPipelineCount = 0; - PipelineChillModeRule(PipelineManager pipelineManager, - SCMChillModeManager manager) { + HealthyPipelineChillModeRule(PipelineManager pipelineManager, + SCMChillModeManager manager, Configuration configuration) { this.pipelineManager = pipelineManager; this.chillModeManager = manager; + double healthyPipelinesPercent = + configuration.getDouble(HddsConfigKeys. + HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT, + HddsConfigKeys. + HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT_DEFAULT); + + int pipelineCount = pipelineManager.getPipelines().size(); + + // This value will be zero when pipeline count is 0. + // On a fresh installed cluster, there will be zero pipelines in the SCM + // pipeline DB. + healthyPipelineThresholdCount = + (int) Math.ceil((healthyPipelinesPercent / 100) * pipelineCount); + + LOG.info(" Total pipeline count is {}, healthy pipeline " + + "threshold count is {}", pipelineCount, healthyPipelineThresholdCount); } @Override public boolean validate() { - return isPipelineAvailable.get(); + if (currentHealthyPipelineCount >= healthyPipelineThresholdCount) { + return true; + } + return false; } @Override - public void process(PipelineReportFromDatanode report) { - // No need to deal with + public void process(PipelineReportFromDatanode pipelineReportFromDatanode) { + Pipeline pipeline; + Preconditions.checkNotNull(pipelineReportFromDatanode); + PipelineReportsProto pipelineReport = + pipelineReportFromDatanode.getReport(); + + for (PipelineReport report : pipelineReport.getPipelineReportList()) { + PipelineID pipelineID = PipelineID + .getFromProtobuf(report.getPipelineID()); + try { + pipeline = pipelineManager.getPipeline(pipelineID); + } catch (PipelineNotFoundException e) { + continue; + } + + if (pipeline.getPipelineState() == Pipeline.PipelineState.OPEN) { + // If the pipeline is open state mean, all 3 datanodes are reported + // for this pipeline. + currentHealthyPipelineCount++; + } + } } @Override @@ -67,38 +113,22 @@ public void cleanup() { @Override public void onMessage(PipelineReportFromDatanode pipelineReportFromDatanode, EventPublisher publisher) { - // If we are already in pipeline available state, - // skipping following check. + // If we have already reached healthy pipeline threshold, skip processing + // pipeline report from datanode. + if (validate()) { chillModeManager.validateChillModeExitRules(publisher); return; } - Pipeline pipeline; - Preconditions.checkNotNull(pipelineReportFromDatanode); - PipelineReportsProto pipelineReport = pipelineReportFromDatanode - .getReport(); - - for (PipelineReport report : pipelineReport.getPipelineReportList()) { - PipelineID pipelineID = PipelineID - .getFromProtobuf(report.getPipelineID()); - try { - pipeline = pipelineManager.getPipeline(pipelineID); - } catch (PipelineNotFoundException e) { - continue; - } + // Process pipeline report from datanode + process(pipelineReportFromDatanode); - if (pipeline.getPipelineState() == Pipeline.PipelineState.OPEN) { - // ensure there is an OPEN state pipeline and then allowed - // to exit chill mode - isPipelineAvailable.set(true); - - if (chillModeManager.getInChillMode()) { - SCMChillModeManager.getLogger() - .info("SCM in chill mode. 1 Pipeline reported, 1 required."); - } - break; - } + if (chillModeManager.getInChillMode()) { + SCMChillModeManager.getLogger().info( + "SCM in chill mode. Healthy pipelines reported count is {}, " + + "required healthy pipeline reported count is {}", + currentHealthyPipelineCount, healthyPipelineThresholdCount); } if (validate()) { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/SCMChillModeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/SCMChillModeManager.java index f80f63114d5cd..d9095fe09b175 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/SCMChillModeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/SCMChillModeManager.java @@ -60,7 +60,8 @@ public class SCMChillModeManager implements private Configuration config; private static final String CONT_EXIT_RULE = "ContainerChillModeRule"; private static final String DN_EXIT_RULE = "DataNodeChillModeRule"; - private static final String PIPELINE_EXIT_RULE = "PipelineChillModeRule"; + private static final String HEALTHY_PIPELINE_EXIT_RULE = + "HealthyPipelineChillModeRule"; private final EventQueue eventPublisher; private final PipelineManager pipelineManager; @@ -83,9 +84,9 @@ public SCMChillModeManager(Configuration conf, HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK, HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT) && pipelineManager != null) { - PipelineChillModeRule rule = new PipelineChillModeRule(pipelineManager, - this); - exitRules.put(PIPELINE_EXIT_RULE, rule); + HealthyPipelineChillModeRule rule = new HealthyPipelineChillModeRule( + pipelineManager, this, config); + exitRules.put(HEALTHY_PIPELINE_EXIT_RULE, rule); eventPublisher.addHandler(SCMEvents.PIPELINE_REPORT, rule); } emitChillModeStatus(); @@ -172,4 +173,10 @@ public double getCurrentContainerThreshold() { .getCurrentContainerThreshold(); } + @VisibleForTesting + public HealthyPipelineChillModeRule getHealthyPipelineChillModeRule() { + return (HealthyPipelineChillModeRule) + exitRules.get(HEALTHY_PIPELINE_EXIT_RULE); + } + } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java new file mode 100644 index 0000000000000..ee0141163c073 --- /dev/null +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java @@ -0,0 +1,152 @@ +package org.apache.hadoop.hdds.scm.chillmode; + +import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.hdds.HddsConfigKeys; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; +import org.apache.hadoop.hdds.protocol.proto. + StorageContainerDatanodeProtocolProtos.PipelineReport; +import org.apache.hadoop.hdds.protocol.proto. + StorageContainerDatanodeProtocolProtos.PipelineReportsProto; +import org.apache.hadoop.hdds.scm.HddsTestUtils; +import org.apache.hadoop.hdds.scm.container.ContainerInfo; +import org.apache.hadoop.hdds.scm.container.MockNodeManager; +import org.apache.hadoop.hdds.scm.events.SCMEvents; +import org.apache.hadoop.hdds.scm.pipeline.Pipeline; +import org.apache.hadoop.hdds.scm.pipeline.PipelineManager; +import org.apache.hadoop.hdds.scm.pipeline.SCMPipelineManager; +import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher; +import org.apache.hadoop.hdds.server.events.EventQueue; +import org.apache.hadoop.test.GenericTestUtils; +import org.junit.Assert; +import org.junit.Test; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; + +/** + * This class tests HealthyPipelineChillMode rule. + */ +public class TestHealthyPipelineChillModeRule { + + @Test + public void testHealthyPipelineChillModeRuleWithNoPipelines() + throws Exception { + + String storageDir = GenericTestUtils.getTempPath( + TestHealthyPipelineChillModeRule.class.getName() + UUID.randomUUID()); + try { + EventQueue eventQueue = new EventQueue(); + List containers = new ArrayList<>(); + containers.addAll(HddsTestUtils.getContainerInfo(1)); + + OzoneConfiguration config = new OzoneConfiguration(); + MockNodeManager nodeManager = new MockNodeManager(true, 0); + config.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir); + // enable pipeline check + config.setBoolean( + HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK, true); + + + PipelineManager pipelineManager = new SCMPipelineManager(config, + nodeManager, eventQueue); + SCMChillModeManager scmChillModeManager = new SCMChillModeManager( + config, containers, pipelineManager, eventQueue); + + HealthyPipelineChillModeRule healthyPipelineChillModeRule = + scmChillModeManager.getHealthyPipelineChillModeRule(); + + // This should be immediately satisfied, as no pipelines are there yet. + Assert.assertTrue(healthyPipelineChillModeRule.validate()); + } finally { + FileUtil.fullyDelete(new File(storageDir)); + } + + } + + + @Test + public void testHealthyPipelineChillModeRuleWithPipelines() throws Exception { + + String storageDir = GenericTestUtils.getTempPath( + TestHealthyPipelineChillModeRule.class.getName() + UUID.randomUUID()); + + try { + EventQueue eventQueue = new EventQueue(); + List containers = new ArrayList<>(); + containers.addAll(HddsTestUtils.getContainerInfo(1)); + + OzoneConfiguration config = new OzoneConfiguration(); + + // In Mock Node Manager, first 8 nodes are healthy, next 2 nodes are + // stale and last one is dead, and this repeats. So for a 12 node, 9 + // healthy, 2 stale and one dead. + MockNodeManager nodeManager = new MockNodeManager(true, 12); + config.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir); + // enable pipeline check + config.setBoolean( + HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK, true); + + + PipelineManager pipelineManager = new SCMPipelineManager(config, + nodeManager, eventQueue); + + // Create 3 pipelines + Pipeline pipeline1 = + pipelineManager.createPipeline(HddsProtos.ReplicationType.RATIS, + HddsProtos.ReplicationFactor.THREE); + Pipeline pipeline2 = + pipelineManager.createPipeline(HddsProtos.ReplicationType.RATIS, + HddsProtos.ReplicationFactor.THREE); + Pipeline pipeline3 = + pipelineManager.createPipeline(HddsProtos.ReplicationType.RATIS, + HddsProtos.ReplicationFactor.THREE); + + + SCMChillModeManager scmChillModeManager = new SCMChillModeManager( + config, containers, pipelineManager, eventQueue); + + HealthyPipelineChillModeRule healthyPipelineChillModeRule = + scmChillModeManager.getHealthyPipelineChillModeRule(); + + + // No datanodes have sent pipelinereport from datanode + Assert.assertFalse(healthyPipelineChillModeRule.validate()); + + // Fire pipeline report from all datanodes in first pipeline, as here we + // have 3 pipelines, 10% is 0.3, when doing ceil it is 1. So, we should + // validate should return true after fire pipeline event + + + //Here testing with out pipelinereport handler, so not moving created + // pipelines to allocated state, as pipelines changing to healthy is + // handled by pipeline report handler. So, leaving pipeline's in pipeline + // manager in open state for test case simplicity. + + firePipelineEvent(pipeline1, eventQueue); + GenericTestUtils.waitFor(() -> healthyPipelineChillModeRule.validate(), + 1000, 5000); + } finally { + FileUtil.fullyDelete(new File(storageDir)); + } + + } + + + private void firePipelineEvent(Pipeline pipeline, EventQueue eventQueue) { + PipelineReportsProto.Builder reportBuilder = PipelineReportsProto + .newBuilder(); + + reportBuilder.addPipelineReport(PipelineReport.newBuilder() + .setPipelineID(pipeline.getId().getProtobuf())); + + // Here no need to fire event from 3 nodes, as already pipeline is in + // open state, but doing it. + eventQueue.fireEvent(SCMEvents.PIPELINE_REPORT, + new SCMDatanodeHeartbeatDispatcher.PipelineReportFromDatanode( + pipeline.getNodes().get(0), reportBuilder.build())); + } + +} From ea21e25e12a5e5db384bc8c3c6658639294f06e6 Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Mon, 25 Feb 2019 22:18:02 -0800 Subject: [PATCH 2/4] address review comments --- .../scm/chillmode/SCMChillModeManager.java | 2 +- .../hadoop/hdds/scm/events/SCMEvents.java | 4 ++++ .../scm/pipeline/PipelineReportHandler.java | 20 ++++++++++++++++++- .../scm/server/StorageContainerManager.java | 2 +- .../TestHealthyPipelineChillModeRule.java | 2 +- .../hdds/scm/pipeline/TestPipelineClose.java | 10 ++++++++-- .../scm/pipeline/TestSCMPipelineManager.java | 10 ++++++++-- 7 files changed, 42 insertions(+), 8 deletions(-) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/SCMChillModeManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/SCMChillModeManager.java index d9095fe09b175..ba79af729c647 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/SCMChillModeManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/SCMChillModeManager.java @@ -87,7 +87,7 @@ public SCMChillModeManager(Configuration conf, HealthyPipelineChillModeRule rule = new HealthyPipelineChillModeRule( pipelineManager, this, config); exitRules.put(HEALTHY_PIPELINE_EXIT_RULE, rule); - eventPublisher.addHandler(SCMEvents.PIPELINE_REPORT, rule); + eventPublisher.addHandler(SCMEvents.PROCESSED_PIPELINE_REPORT, rule); } emitChillModeStatus(); } else { diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java index 9979783b53e18..678896768c216 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java @@ -103,6 +103,10 @@ public final class SCMEvents { public static final TypedEvent PIPELINE_REPORT = new TypedEvent<>(PipelineReportFromDatanode.class, "Pipeline_Report"); + public static final TypedEvent + PROCESSED_PIPELINE_REPORT = new TypedEvent<>( + PipelineReportFromDatanode.class, "Processed_Pipeline_Report"); + /** * PipelineActions are sent by Datanode. This event is received by * SCMDatanodeHeartbeatDispatcher and PIPELINE_ACTIONS event is generated. diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineReportHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineReportHandler.java index daffe1e611b47..9c914b077b203 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineReportHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/pipeline/PipelineReportHandler.java @@ -20,11 +20,14 @@ import com.google.common.base.Preconditions; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.HddsConfigKeys; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.PipelineReport; import org.apache.hadoop.hdds.protocol.proto .StorageContainerDatanodeProtocolProtos.PipelineReportsProto; +import org.apache.hadoop.hdds.scm.chillmode.SCMChillModeManager; +import org.apache.hadoop.hdds.scm.events.SCMEvents; import org.apache.hadoop.hdds.scm.server .SCMDatanodeHeartbeatDispatcher.PipelineReportFromDatanode; import org.apache.hadoop.hdds.server.events.EventHandler; @@ -33,6 +36,7 @@ import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.Objects; /** * Handles Pipeline Reports from datanode. @@ -44,12 +48,21 @@ public class PipelineReportHandler implements .getLogger(PipelineReportHandler.class); private final PipelineManager pipelineManager; private final Configuration conf; + private final SCMChillModeManager scmChillModeManager; + private final boolean pipelineAvailabilityCheck; - public PipelineReportHandler(PipelineManager pipelineManager, + public PipelineReportHandler(SCMChillModeManager scmChillModeManager, + PipelineManager pipelineManager, Configuration conf) { Preconditions.checkNotNull(pipelineManager); + Objects.requireNonNull(scmChillModeManager); + this.scmChillModeManager = scmChillModeManager; this.pipelineManager = pipelineManager; this.conf = conf; + this.pipelineAvailabilityCheck = conf.getBoolean( + HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK, + HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT); + } @Override @@ -70,6 +83,11 @@ public void onMessage(PipelineReportFromDatanode pipelineReportFromDatanode, report, dn, e); } } + if (pipelineAvailabilityCheck && scmChillModeManager.getInChillMode()) { + publisher.fireEvent(SCMEvents.PROCESSED_PIPELINE_REPORT, + pipelineReportFromDatanode); + } + } private void processPipelineReport(PipelineReport report, DatanodeDetails dn) diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java index 9bf47e6072852..fc93235b7b0e3 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/StorageContainerManager.java @@ -289,7 +289,7 @@ public StorageContainerManager(OzoneConfiguration conf, NodeReportHandler nodeReportHandler = new NodeReportHandler(scmNodeManager); PipelineReportHandler pipelineReportHandler = - new PipelineReportHandler(pipelineManager, conf); + new PipelineReportHandler(scmChillModeManager, pipelineManager, conf); CommandStatusReportHandler cmdStatusReportHandler = new CommandStatusReportHandler(); diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java index ee0141163c073..e2920fbc3a0d8 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java @@ -144,7 +144,7 @@ private void firePipelineEvent(Pipeline pipeline, EventQueue eventQueue) { // Here no need to fire event from 3 nodes, as already pipeline is in // open state, but doing it. - eventQueue.fireEvent(SCMEvents.PIPELINE_REPORT, + eventQueue.fireEvent(SCMEvents.PROCESSED_PIPELINE_REPORT, new SCMDatanodeHeartbeatDispatcher.PipelineReportFromDatanode( pipeline.getNodes().get(0), reportBuilder.build())); } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineClose.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineClose.java index 1ce893a918b29..e855d2ce16769 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineClose.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestPipelineClose.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReport; import org.apache.hadoop.hdds.scm.ScmConfigKeys; import org.apache.hadoop.hdds.scm.TestUtils; +import org.apache.hadoop.hdds.scm.chillmode.SCMChillModeManager; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.ContainerInfo; import org.apache.hadoop.hdds.scm.container.ContainerManager; @@ -40,6 +41,7 @@ import org.junit.Test; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Set; import java.util.concurrent.TimeUnit; @@ -192,11 +194,15 @@ public void testPipelineCloseWithPipelineReport() throws IOException { for (DatanodeDetails dn : pipeline.getNodes()) { PipelineReportFromDatanode pipelineReport = TestUtils.getPipelineReportFromDatanode(dn, pipeline.getId()); + EventQueue eventQueue = new EventQueue(); + SCMChillModeManager scmChillModeManager = + new SCMChillModeManager(new OzoneConfiguration(), + new ArrayList<>(), pipelineManager, eventQueue); PipelineReportHandler pipelineReportHandler = - new PipelineReportHandler(pipelineManager, conf); + new PipelineReportHandler(scmChillModeManager, pipelineManager, conf); // on receiving pipeline report for the pipeline, pipeline report handler // should destroy the pipeline for the dn - pipelineReportHandler.onMessage(pipelineReport, new EventQueue()); + pipelineReportHandler.onMessage(pipelineReport, eventQueue); } OzoneContainer ozoneContainer = diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java index ad53015e175e3..c871a565eee4e 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/pipeline/TestSCMPipelineManager.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.TestUtils; +import org.apache.hadoop.hdds.scm.chillmode.SCMChillModeManager; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.container.MockNodeManager; import org.apache.hadoop.hdds.scm.server.SCMDatanodeHeartbeatDispatcher.PipelineReportFromDatanode; @@ -37,6 +38,7 @@ import java.io.File; import java.io.IOException; +import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -131,8 +133,12 @@ public void testRemovePipeline() throws IOException { @Test public void testPipelineReport() throws IOException { + EventQueue eventQueue = new EventQueue(); PipelineManager pipelineManager = - new SCMPipelineManager(conf, nodeManager, new EventQueue()); + new SCMPipelineManager(conf, nodeManager, eventQueue); + SCMChillModeManager scmChillModeManager = + new SCMChillModeManager(new OzoneConfiguration(), + new ArrayList<>(), pipelineManager, eventQueue); // create a pipeline in allocated state with no dns yet reported Pipeline pipeline = pipelineManager @@ -145,7 +151,7 @@ public void testPipelineReport() throws IOException { // get pipeline report from each dn in the pipeline PipelineReportHandler pipelineReportHandler = - new PipelineReportHandler(pipelineManager, conf); + new PipelineReportHandler(scmChillModeManager, pipelineManager, conf); for (DatanodeDetails dn: pipeline.getNodes()) { PipelineReportFromDatanode pipelineReportFromDatanode = TestUtils.getPipelineReportFromDatanode(dn, pipeline.getId()); From bebe0040ca0cb104097ecce1f19a175056b8b559 Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Mon, 25 Feb 2019 22:23:29 -0800 Subject: [PATCH 3/4] fix asf error --- .../TestHealthyPipelineChillModeRule.java | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java index e2920fbc3a0d8..adfa73f449430 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/chillmode/TestHealthyPipelineChillModeRule.java @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements.  See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership.  The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License.  You may obtain a copy of the License at + * + *      http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.hdds.scm.chillmode; import org.apache.hadoop.fs.FileUtil; From db0361c38af3b972492d14f2dc94f9681609710f Mon Sep 17 00:00:00 2001 From: Bharat Viswanadham Date: Tue, 26 Feb 2019 15:33:28 -0800 Subject: [PATCH 4/4] Fix test failure and added little more changes --- .../common/src/main/resources/ozone-default.xml | 10 ++++++++++ .../scm/chillmode/HealthyPipelineChillModeRule.java | 6 +++++- .../org/apache/hadoop/hdds/scm/events/SCMEvents.java | 4 ++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml index c6834e6ca8396..b114daa2999b3 100644 --- a/hadoop-hdds/common/src/main/resources/ozone-default.xml +++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml @@ -1315,6 +1315,16 @@ + + hdds.scm.chillmode.healthy.pipelie.pct + 0.10 + HDDS,SCM,OPERATION + + Percentage of healthy pipelines, where all 3 datanodes are reported in the + pipeline. + + + hdds.container.action.max.limit 20 diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/HealthyPipelineChillModeRule.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/HealthyPipelineChillModeRule.java index 8392fe0ba5be5..07088ca074e3e 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/HealthyPipelineChillModeRule.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/HealthyPipelineChillModeRule.java @@ -19,6 +19,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdds.HddsConfigKeys; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReport; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; @@ -61,7 +62,10 @@ public class HealthyPipelineChillModeRule HddsConfigKeys. HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT_DEFAULT); - int pipelineCount = pipelineManager.getPipelines().size(); + // As we want to wait for 3 node pipelines + int pipelineCount = + pipelineManager.getPipelines(HddsProtos.ReplicationType.RATIS, + HddsProtos.ReplicationFactor.THREE).size(); // This value will be zero when pipeline count is 0. // On a fresh installed cluster, there will be zero pipelines in the SCM diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java index 678896768c216..0eb8e1861a4d7 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/events/SCMEvents.java @@ -103,6 +103,10 @@ public final class SCMEvents { public static final TypedEvent PIPELINE_REPORT = new TypedEvent<>(PipelineReportFromDatanode.class, "Pipeline_Report"); + /** + * PipelineReport processed by pipeline report handler. This event is + * received by HealthyPipelineChillModeRule. + */ public static final TypedEvent PROCESSED_PIPELINE_REPORT = new TypedEvent<>( PipelineReportFromDatanode.class, "Processed_Pipeline_Report");