8000 HDDS-1178. Healthy pipeline Chill Mode Rule. by bharatviswa504 · Pull Request #518 · apache/hadoop · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

HDDS-1178. Healthy pipeline Chill Mode Rule. #518

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,15 @@ public final class HddsConfigKeys {
public static final String HDDS_SCM_CHILLMODE_THRESHOLD_PCT =
"hdds.scm.chillmode.threshold.pct";
public static final double HDDS_SCM_CHILLMODE_THRESHOLD_PCT_DEFAULT = 0.99;


// percentage of healthy pipelines, where all 3 datanodes are reported in the
// pipeline.
public static final String HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT =
"hdds.scm.chillmode.healthy.pipelie.pct";
public static final double
HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT_DEFAULT = 0.10;

public static final String HDDS_LOCK_MAX_CONCURRENCY =
"hdds.lock.max.concurrency";
public static final int HDDS_LOCK_MAX_CONCURRENCY_DEFAULT = 100;
Expand Down
10 changes: 10 additions & 0 deletions hadoop-hdds/common/src/main/resources/ozone-default.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1315,6 +1315,16 @@
</description>
</property>

<property>
<name>hdds.scm.chillmode.healthy.pipelie.pct</name>
<value>0.10</value>
<tag>HDDS,SCM,OPERATION</tag>
<description>
Percentage of healthy pipelines, where all 3 datanodes are reported in the
pipeline.
</description>
</property>

<property>
<name>hdds.container.action.max.limit</name>
<value>20</value>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
*/
package org.apache.hadoop.hdds.scm.chillmode;

import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReport;
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReportsProto;
import org.apache.hadoop.hdds.scm.pipeline.Pipeline;
Expand All @@ -30,33 +31,82 @@
import org.apache.hadoop.hdds.server.events.EventPublisher;

import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Class defining Chill mode exit criteria for Pipelines.
*
* This rule defines percentage of healthy pipelines need to be reported.
* Once chill mode exit happens, this rules take care of writes can go
* through in a cluster.
*/
public class PipelineChillModeRule
public class HealthyPipelineChillModeRule
implements ChillModeExitRule<PipelineReportFromDatanode>,
EventHandler<PipelineReportFromDatanode> {
/** Pipeline availability.*/
private AtomicBoolean isPipelineAvailable = new AtomicBoolean(false);

private static final Logger LOG =
LoggerFactory.getLogger(HealthyPipelineChillModeRule.class);
private final PipelineManager pipelineManager;
private final SCMChillModeManager chillModeManager;
private final int healthyPipelineThresholdCount;
private int currentHealthyPipelineCount = 0;

PipelineChillModeRule(PipelineManager pipelineManager,
SCMChillModeManager manager) {
HealthyPipelineChillModeRule(PipelineManager pipelineManager,
SCMChillModeManager manager, Configuration configuration) {
this.pipelineManager = pipelineManager;
this.chillModeManager = manager;
double healthyPipelinesPercent =
configuration.getDouble(HddsConfigKeys.
HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT,
HddsConfigKeys.
HDDS_SCM_CHILLMODE_HEALTHY_PIPELINE_THRESHOLD_PCT_DEFAULT);

// As we want to wait for 3 node pipelines
int pipelineCount =
pipelineManager.getPipelines(HddsProtos.ReplicationType.RATIS,
HddsProtos.ReplicationFactor.THREE).size();

// This value will be zero when pipeline count is 0.
// On a fresh installed cluster, there will be zero pipelines in the SCM
// pipeline DB.
healthyPipelineThresholdCount =
(int) Math.ceil((healthyPipelinesPercent / 100) * pipelineCount);

LOG.info(" Total pipeline count is {}, healthy pipeline " +
"threshold count is {}", pipelineCount, healthyPipelineThresholdCount);
}

@Override
public boolean validate() {
return isPipelineAvailable.get();
if (currentHealthyPipelineCount >= healthyPipelineThresholdCount) {
return true;
}
return false;
}

@Override
public void process(PipelineReportFromDatanode report) {
// No need to deal with
public void process(PipelineReportFromDatanode pipelineReportFromDatanode) {
Pipeline pipeline;
Preconditions.checkNotNull(pipelineReportFromDatanode);
PipelineReportsProto pipelineReport =
pipelineReportFromDatanode.getReport();

for (PipelineReport report : pipelineReport.getPipelineReportList()) {
PipelineID pipelineID = PipelineID
.getFromProtobuf(report.getPipelineID());
try {
pipeline = pipelineManager.getPipeline(pipelineID);
} catch (PipelineNotFoundException e) {
continue;
}

if (pipeline.getPipelineState() == Pipeline.PipelineState.OPEN) {
// If the pipeline is open state mean, all 3 datanodes are reported
// for this pipeline.
currentHealthyPipelineCount++;
}
}
}

@Override
Expand All @@ -67,38 +117,22 @@ public void cleanup() {
@Override
public void onMessage(PipelineReportFromDatanode pipelineReportFromDatanode,
EventPublisher publisher) {
// If we are already in pipeline available state,
// skipping following check.
// If we have already reached healthy pipeline threshold, skip processing
// pipeline report from datanode.

if (validate()) {
chillModeManager.validateChillModeExitRules(publisher);
return;
}

Pipeline pipeline;
Preconditions.checkNotNull(pipelineReportFromDatanode);
PipelineReportsProto pipelineReport = pipelineReportFromDatanode
.getReport();

for (PipelineReport report : pipelineReport.getPipelineReportList()) {
PipelineID pipelineID = PipelineID
.getFromProtobuf(report.getPipelineID());
try {
pipeline = pipelineManager.getPipeline(pipelineID);
} catch (PipelineNotFoundException e) {
continue;
}
// Process pipeline report from datanode
process(pipelineReportFromDatanode);

if (pipeline.getPipelineState() == Pipeline.PipelineState.OPEN) {
// ensure there is an OPEN state pipeline and then allowed
// to exit chill mode
isPipelineAvailable.set(true);

if (chillModeManager.getInChillMode()) {
SCMChillModeManager.getLogger()
.info("SCM in chill mode. 1 Pipeline reported, 1 required.");
}
break;
}
if (chillModeManager.getInChillMode()) {
SCMChillModeManager.getLogger().info(
"SCM in chill mode. Healthy pipelines reported count is {}, " +
"required healthy pipeline reported count is {}",
currentHealthyPipelineCount, healthyPipelineThresholdCount);
}

if (validate()) {
Expand Down
< 6DB6 div class="js-expand-full-wrapper d-inline-block">
17 changes: 12 additions & 5 deletions ...ds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/chillmode/SCMChillModeManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ public class SCMChillModeManager implements
private Configuration config;
private static final String CONT_EXIT_RULE = "ContainerChillModeRule";
private static final String DN_EXIT_RULE = "DataNodeChillModeRule";
private static final String PIPELINE_EXIT_RULE = "PipelineChillModeRule";
private static final String HEALTHY_PIPELINE_EXIT_RULE =
"HealthyPipelineChillModeRule";

private final EventQueue eventPublisher;
private final PipelineManager pipelineManager;
Expand All @@ -83,10 +84,10 @@ public SCMChillModeManager(Configuration conf,
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK,
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT)
&& pipelineManager != null) {
PipelineChillModeRule rule = new PipelineChillModeRule(pipelineManager,
this);
exitRules.put(PIPELINE_EXIT_RULE, rule);
eventPublisher.addHandler(SCMEvents.PIPELINE_REPORT, rule);
HealthyPipelineChillModeRule rule = new HealthyPipelineChillModeRule(
pipelineManager, this, config);
exitRules.put(HEALTHY_PIPELINE_EXIT_ 9E12 RULE, rule);
eventPublisher.addHandler(SCMEvents.PROCESSED_PIPELINE_REPORT, rule);
}
emitChillModeStatus();
} else {
Expand Down Expand Up @@ -172,4 +173,10 @@ public double getCurrentContainerThreshold() {
.getCurrentContainerThreshold();
}

@VisibleForTesting
public HealthyPipelineChillModeRule getHealthyPipelineChillModeRule() {
return (HealthyPipelineChillModeRule)
exitRules.get(HEALTHY_PIPELINE_EXIT_RULE);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@ public final class SCMEvents {
public static final TypedEvent<PipelineReportFromDatanode> PIPELINE_REPORT =
new TypedEvent<>(PipelineReportFromDatanode.class, "Pipeline_Report");

/**
* PipelineReport processed by pipeline report handler. This event is
* received by HealthyPipelineChillModeRule.
*/
public static final TypedEvent<PipelineReportFromDatanode>
PROCESSED_PIPELINE_REPORT = new TypedEvent<>(
PipelineReportFromDatanode.class, "Processed_Pipeline_Report");

/**
* PipelineActions are sent by Datanode. This event is received by
* SCMDatanodeHeartbeatDispatcher and PIPELINE_ACTIONS event is generated.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@

import com.google.common.base.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto
.StorageContainerDatanodeProtocolProtos.PipelineReport;
import org.apache.hadoop.hdds.protocol.proto
.StorageContainerDatanodeProtocolProtos.PipelineReportsProto;
import org.apache.hadoop.hdds.scm.chillmode.SCMChillModeManager;
import org.apache.hadoop.hdds.scm.events.SCMEvents;
import org.apache.hadoop.hdds.scm.server
.SCMDatanodeHeartbeatDispatcher.PipelineReportFromDatanode;
import org.apache.hadoop.hdds.server.events.EventHandler;
Expand All @@ -33,6 +36,7 @@
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Objects;

/**
* Handles Pipeline Reports from datanode.
Expand All @@ -44,12 +48,21 @@ public class PipelineReportHandler implements
.getLogger(PipelineReportHandler.class);
private final PipelineManager pipelineManager;
private final Configuration conf;
private final SCMChillModeManager scmChillModeManager;
private final boolean pipelineAvailabilityCheck;

public PipelineReportHandler(PipelineManager pipelineManager,
public PipelineReportHandler(SCMChillModeManager scmChillModeManager,
PipelineManager pipelineManager,
Configuration conf) {
Preconditions.checkNotNull(pipelineManager);
Objects.requireNonNull(scmChillModeManager);
this.scmChillModeManager = scmChillModeManager;
this.pipelineManager = pipelineManager;
this.conf = conf;
this.pipelineAvailabilityCheck = conf.getBoolean(
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK,
HddsConfigKeys.HDDS_SCM_CHILLMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT);

}

@Override
Expand All @@ -70,6 +83,11 @@ public void onMessage(PipelineReportFromDatanode pipelineReportFromDatanode,
report, dn, e);
}
}
if (pipelineAvailabilityCheck && scmChillModeManager.getInChillMode()) {
publisher.fireEvent(SCMEvents.PROCESSED_PIPELINE_REPORT,
pipelineReportFromDatanode);
}

}

private void processPipelineReport(PipelineReport report, DatanodeDetails dn)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ public StorageContainerManager(OzoneConfiguration conf,
NodeReportHandler nodeReportHandler =
new NodeReportHandler(scmNodeManager);
PipelineReportHandler pipelineReportHandler =
new PipelineReportHandler(pipelineManager, conf);
new PipelineReportHandler(scmChillModeManager, pipelineManager, conf);
CommandStatusReportHandler cmdStatusReportHandler =
new CommandStatusReportHandler();

Expand Down
Loading
0