From b8fb6f02628ab92de91c03161775d8a9c0d65dd9 Mon Sep 17 00:00:00 2001 From: tom lee Date: Sun, 26 Dec 2021 12:30:24 +0800 Subject: [PATCH 1/4] HDFS-16396. Reconfig slow peer parameters for datanode --- .../hdfs/server/datanode/BPServiceActor.java | 2 +- .../hdfs/server/datanode/BlockReceiver.java | 4 +- .../hadoop/hdfs/server/datanode/DNConf.java | 6 +- .../hadoop/hdfs/server/datanode/DataNode.java | 70 +++++++++++++++- .../hdfs/server/datanode/DataXceiver.java | 2 +- .../datanode/metrics/DataNodePeerMetrics.java | 42 ++++++++-- .../datanode/TestDataNodeReconfiguration.java | 81 +++++++++++++++++++ .../hadoop/hdfs/tools/TestDFSAdmin.java | 2 +- 8 files changed, 194 insertions(+), 15 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java index f1fc51eb1d14d..bf284d1e1dd96 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java @@ -552,7 +552,7 @@ HeartbeatResponse sendHeartBeat(boolean requestBlockReportLease) volumeFailureSummary.getFailedStorageLocations().length : 0; final boolean outliersReportDue = scheduler.isOutliersReportDue(now); final SlowPeerReports slowPeers = - outliersReportDue && dn.getPeerMetrics() != null ? + outliersReportDue && dnConf.peerStatsEnabled && dn.getPeerMetrics() != null ? SlowPeerReports.create(dn.getPeerMetrics().getOutliers()) : SlowPeerReports.EMPTY_REPORT; final SlowDiskReports slowDisks = diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java index 3c751bfc94857..62bc66080ce53 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java @@ -887,7 +887,7 @@ duration, datanodeSlowLogThresholdMs, getVolumeBaseUri(), */ private void trackSendPacketToLastNodeInPipeline(final long elapsedMs) { final DataNodePeerMetrics peerMetrics = datanode.getPeerMetrics(); - if (peerMetrics != null && isPenultimateNode) { + if (datanode.getDnConf().peerStatsEnabled && peerMetrics != null && isPenultimateNode) { peerMetrics.addSendPacketDownstream(mirrorNameForMetrics, elapsedMs); } } @@ -1107,7 +1107,7 @@ private void initPerfMonitoring(DatanodeInfo[] downstreams) { if (downstreams != null && downstreams.length > 0) { downstreamDNs = downstreams; isPenultimateNode = (downstreams.length == 1); - if (isPenultimateNode && datanode.getPeerMetrics() != null) { + if (isPenultimateNode && datanode.getDnConf().peerStatsEnabled) { mirrorNameForMetrics = (downstreams[0].getInfoSecurePort() != 0 ? downstreams[0].getInfoSecureAddr() : downstreams[0].getInfoAddr()); LOG.debug("Will collect peer metrics for downstream node {}", diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java index 40d0df369912e..563fbde2719de 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DNConf.java @@ -108,7 +108,7 @@ public class DNConf { private final long lifelineIntervalMs; volatile long blockReportInterval; volatile long blockReportSplitThreshold; - final boolean peerStatsEnabled; + volatile boolean peerStatsEnabled; final boolean diskStatsEnabled; final long outliersReportIntervalMs; final long ibrInterval; @@ -507,4 +507,8 @@ void setInitBRDelayMs(String delayMs) { dn.getConf().set(DFS_BLOCKREPORT_INITIAL_DELAY_KEY, delayMs); initBlockReportDelay(); } + + void setPeerStatsEnabled(boolean enablePeerStats) { + peerStatsEnabled = enablePeerStats; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index 5f4db85aa9db4..6087d8d753cf1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -46,11 +46,19 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_LOCKED_MEMORY_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_RECEIVER_THREADS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_RECEIVER_THREADS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_NETWORK_COUNTS_CACHE_MAX_SIZE_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_NETWORK_COUNTS_CACHE_MAX_SIZE_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_OOB_TIMEOUT_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_OOB_TIMEOUT_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PLUGINS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_STARTUP_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT; @@ -315,7 +323,11 @@ public class DataNode extends ReconfigurableBase DFS_BLOCKREPORT_SPLIT_THRESHOLD_KEY, DFS_BLOCKREPORT_INITIAL_DELAY_KEY, DFS_DATANODE_MAX_RECEIVER_THREADS_KEY, - DFS_CACHEREPORT_INTERVAL_MSEC_KEY)); + DFS_CACHEREPORT_INTERVAL_MSEC_KEY, + DFS_DATANODE_PEER_STATS_ENABLED_KEY, + DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY, + DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY, + DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY)); public static final Log METRICS_LOG = LogFactory.getLog("DataNodeMetricsLog"); @@ -357,7 +369,7 @@ public static InetSocketAddress createSocketAddr(String target) { DataNodeMetrics metrics; @Nullable - private DataNodePeerMetrics peerMetrics; + private volatile DataNodePeerMetrics peerMetrics; private DataNodeDiskMetrics diskMetrics; private InetSocketAddress streamingAddr; @@ -634,6 +646,11 @@ public String reconfigurePropertyImpl(String property, String newVal) return reconfDataXceiverParameters(property, newVal); case DFS_CACHEREPORT_INTERVAL_MSEC_KEY: return reconfCacheReportParameters(property, newVal); + case DFS_DATANODE_PEER_STATS_ENABLED_KEY: + case DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY: + case DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY: + case DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY: + return reconfSlowPeerParameters(property, newVal); default: break; } @@ -713,6 +730,53 @@ private String reconfBlockReportParameters(String property, String newVal) } } + private String reconfSlowPeerParameters(String property, String newVal) + throws ReconfigurationException { + String result = null; + try { + LOG.info("Reconfiguring {} to {}", property, newVal); + if (property.equals(DFS_DATANODE_PEER_STATS_ENABLED_KEY)) { + Preconditions.checkNotNull(dnConf, "DNConf has not been initialized."); + if (newVal != null && !newVal.equalsIgnoreCase("true") + && !newVal.equalsIgnoreCase("false")) { + throw new IllegalArgumentException("Not a valid Boolean value for " + property + + " in reconfSlowPeerParameters"); + } + boolean enable = (newVal == null ? DFS_DATANODE_PEER_STATS_ENABLED_DEFAULT : + Boolean.parseBoolean(newVal)); + result = Boolean.toString(enable); + dnConf.setPeerStatsEnabled(enable); + if (enable) { + // Create if it doesn't exist, overwrite if it does. + peerMetrics = DataNodePeerMetrics.create(getDisplayName(), getConf()); + } + } else if (property.equals(DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY)) { + Preconditions.checkNotNull(peerMetrics, "DataNode peer stats may be disabled."); + long minNodes = (newVal == null ? DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_DEFAULT : + Long.parseLong(newVal)); + result = Long.toString(minNodes); + peerMetrics.setMinOutlierDetectionNodes(minNodes); + } else if (property.equals(DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY)) { + Preconditions.checkNotNull(peerMetrics, "DataNode peer stats may be disabled."); + long threshold = (newVal == null ? DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT : + Long.parseLong(newVal)); + result = Long.toString(threshold); + peerMetrics.setLowThresholdMs(threshold); + } else if (property.equals(DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY)) { + Preconditions.checkNotNull(peerMetrics, "DataNode peer stats may be disabled."); + long minSamples = (newVal == null ? + DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT : + Long.parseLong(newVal)); + result = Long.toString(minSamples); + peerMetrics.setMinOutlierDetectionSamples(minSamples); + } + LOG.info("RECONFIGURE* changed {} to {}", property, newVal); + return result; + } catch (IllegalArgumentException e) { + throw new ReconfigurationException(property, newVal, getConf().get(property), e); + } + } + /** * Get a list of the keys of the re-configurable properties in configuration. */ @@ -3872,7 +3936,7 @@ void setBlockScanner(BlockScanner blockScanner) { @Override // DataNodeMXBean public String getSendPacketDownstreamAvgInfo() { - return peerMetrics != null ? + return dnConf.peerStatsEnabled && peerMetrics != null ? peerMetrics.dumpSendPacketDownstreamAvgInfoAsJson() : null; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java index 46acd5a647aa8..a02acc7696fb2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java @@ -341,7 +341,7 @@ public void run() { * the thread dies away. */ private void collectThreadLocalStates() { - if (datanode.getPeerMetrics() != null) { + if (datanode.getDnConf().peerStatsEnabled && datanode.getPeerMetrics() != null) { datanode.getPeerMetrics().collectThreadLocalStates(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodePeerMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodePeerMetrics.java index 750e53db13bf7..d493d0140ad05 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodePeerMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodePeerMetrics.java @@ -25,14 +25,17 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.metrics2.MetricsJsonBuilder; import org.apache.hadoop.metrics2.lib.MutableRollingAverages; +import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.Map; import java.util.concurrent.ThreadLocalRandom; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY; /** * This class maintains DataNode peer metrics (e.g. numOps, AvgTime, etc.) for @@ -57,15 +60,15 @@ public class DataNodePeerMetrics { * for outlier detection. If the number of samples is below this then * outlier detection is skipped. */ - private final long minOutlierDetectionSamples; + private volatile long minOutlierDetectionSamples; /** * Threshold in milliseconds below which a DataNode is definitely not slow. */ - private final long lowThresholdMs; + private volatile long lowThresholdMs; /** * Minimum number of nodes to run outlier detection. */ - private final long minOutlierDetectionNodes; + private volatile long minOutlierDetectionNodes; public DataNodePeerMetrics(final String name, Configuration conf) { this.name = name; @@ -73,10 +76,10 @@ public DataNodePeerMetrics(final String name, Configuration conf) { DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY, DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT); lowThresholdMs = - conf.getLong(DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY, + conf.getLong(DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY, DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT); minOutlierDetectionNodes = - conf.getLong(DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY, + conf.getLong(DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY, DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_DEFAULT); this.slowNodeDetector = new OutlierDetector(minOutlierDetectionNodes, lowThresholdMs); @@ -87,7 +90,7 @@ public String name() { return name; } - long getMinOutlierDetectionSamples() { + public long getMinOutlierDetectionSamples() { return minOutlierDetectionSamples; } @@ -150,4 +153,31 @@ public Map getOutliers() { public MutableRollingAverages getSendPacketDownstreamRollingAverages() { return sendPacketDownstreamRollingAverages; } + + public void setMinOutlierDetectionNodes(long minNodes) { + Preconditions.checkArgument(minNodes > 0, + DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY + " should be larger than 0"); + minOutlierDetectionNodes = minNodes; + } + + public long getMinOutlierDetectionNodes() { + return minOutlierDetectionNodes; + } + + public void setLowThresholdMs(long thresholdMs) { + Preconditions.checkArgument(thresholdMs > 0, + DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY + " should be larger than 0"); + lowThresholdMs = thresholdMs; + } + + public long getLowThresholdMs() { + return lowThresholdMs; + } + + public void setMinOutlierDetectionSamples(long minSamples) { + Preconditions.checkArgument(minSamples > 0, + DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY + + " should be larger than 0"); + minOutlierDetectionSamples = minSamples; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java index 75f1ee9bbb500..8cea9e7632027 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java @@ -28,7 +28,13 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_RECEIVER_THREADS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_RECEIVER_THREADS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -84,6 +90,7 @@ public void tearDown() throws Exception { private void startDFSCluster(int numNameNodes, int numDataNodes) throws IOException { Configuration conf = new Configuration(); + conf.setBoolean(DFS_DATANODE_PEER_STATS_ENABLED_KEY, true); MiniDFSNNTopology nnTopology = MiniDFSNNTopology .simpleFederatedTopology(numNameNodes); @@ -467,4 +474,78 @@ public void testCacheReportReconfiguration() dn.getConf().get(DFS_CACHEREPORT_INTERVAL_MSEC_KEY)); } } + + @Test + public void testSlowPeerParameters() + throws ReconfigurationException { + String[] slowPeersParameters = { + DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY, + DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY, + DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY}; + + for (int i = 0; i < NUM_DATA_NODE; i++) { + DataNode dn = cluster.getDataNodes().get(i); + + // Try invalid values. + try { + dn.reconfigureProperty(DFS_DATANODE_PEER_STATS_ENABLED_KEY, "text"); + } catch (ReconfigurationException expected) { + assertEquals("Could not change property dfs.datanode.peer.stats.enabled " + + "from 'true' to 'text'", expected.getMessage()); + } + + for (String parameter : slowPeersParameters) { + try { + dn.reconfigureProperty(parameter, "text"); + fail("ReconfigurationException expected"); + } catch (ReconfigurationException expected) { + assertTrue("expecting NumberFormatException", + expected.getCause() instanceof NumberFormatException); + } + + try { + dn.reconfigureProperty(parameter, String.valueOf(-1)); + fail("ReconfigurationException expected"); + } catch (ReconfigurationException expected) { + assertTrue("expecting IllegalArgumentException", + expected.getCause() instanceof IllegalArgumentException); + } + } + + // Change and verify properties. + dn.reconfigureProperty(DFS_DATANODE_PEER_STATS_ENABLED_KEY, "false"); + assertFalse(dn.getDnConf().peerStatsEnabled); + + // Reset DFS_DATANODE_PEER_STATS_ENABLED_KEY to true. + dn.reconfigureProperty(DFS_DATANODE_PEER_STATS_ENABLED_KEY, "true"); + for (String parameter : slowPeersParameters) { + dn.reconfigureProperty(parameter, "123"); + } + assertEquals(123, dn.getPeerMetrics().getMinOutlierDetectionNodes()); + assertEquals(123, dn.getPeerMetrics().getLowThresholdMs()); + assertEquals(123, dn.getPeerMetrics().getMinOutlierDetectionSamples()); + + // Revert to default and verify. + dn.reconfigureProperty(DFS_DATANODE_PEER_STATS_ENABLED_KEY, null); + assertEquals(String.format("expect %s is not configured", + DFS_DATANODE_PEER_STATS_ENABLED_KEY), null, + dn.getConf().get(DFS_DATANODE_PEER_STATS_ENABLED_KEY)); + + // Reset DFS_DATANODE_PEER_STATS_ENABLED_KEY to true. + dn.reconfigureProperty(DFS_DATANODE_PEER_STATS_ENABLED_KEY, "true"); + + for (String parameter : slowPeersParameters) { + dn.reconfigureProperty(parameter, null); + } + assertEquals(String.format("expect %s is not configured", + DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY), null, + dn.getConf().get(DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY)); + assertEquals(String.format("expect %s is not configured", + DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY), null, + dn.getConf().get(DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY)); + assertEquals(String.format("expect %s is not configured", + DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY), null, + dn.getConf().get(DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY)); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java index 19d834cbe5557..19863de09d6c9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java @@ -338,7 +338,7 @@ public void testDataNodeGetReconfigurableProperties() throws IOException { final List outs = Lists.newArrayList(); final List errs = Lists.newArrayList(); getReconfigurableProperties("datanode", address, outs, errs); - assertEquals(8, outs.size()); + assertEquals(12, outs.size()); assertEquals(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, outs.get(1)); } From edbee4bd469fcd741376783236d828b70a3f5051 Mon Sep 17 00:00:00 2001 From: tom lee Date: Thu, 27 Jan 2022 09:47:41 +0800 Subject: [PATCH 2/4] use LambdaTestUtils --- .../datanode/TestDataNodeReconfiguration.java | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java index 8cea9e7632027..7b814a8e018c1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java @@ -51,6 +51,8 @@ import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.test.LambdaTestUtils; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -476,8 +478,7 @@ public void testCacheReportReconfiguration() } @Test - public void testSlowPeerParameters() - throws ReconfigurationException { + public void testSlowPeerParameters() throws Exception { String[] slowPeersParameters = { DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY, DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY, @@ -487,12 +488,9 @@ public void testSlowPeerParameters() DataNode dn = cluster.getDataNodes().get(i); // Try invalid values. - try { - dn.reconfigureProperty(DFS_DATANODE_PEER_STATS_ENABLED_KEY, "text"); - } catch (ReconfigurationException expected) { - assertEquals("Could not change property dfs.datanode.peer.stats.enabled " + - "from 'true' to 'text'", expected.getMessage()); - } + LambdaTestUtils.intercept(ReconfigurationException.class, + "Could not change property dfs.datanode.peer.stats.enabled from 'true' to 'text'", + () -> dn.reconfigureProperty(DFS_DATANODE_PEER_STATS_ENABLED_KEY, "text")); for (String parameter : slowPeersParameters) { try { From 5332ff35509109879ad95d5529e555cc9e35f588 Mon Sep 17 00:00:00 2001 From: tom lee Date: Thu, 27 Jan 2022 10:10:23 +0800 Subject: [PATCH 3/4] remove unused import --- .../hdfs/server/datanode/TestDataNodeReconfiguration.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java index 7b814a8e018c1..4dcc68a181344 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java @@ -34,7 +34,6 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -51,7 +50,6 @@ import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; -import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.test.LambdaTestUtils; import org.junit.After; import org.junit.Assert; From 1b9155e5cb920beab40e352a0b00dbd8b5178af9 Mon Sep 17 00:00:00 2001 From: tom lee Date: Mon, 14 Feb 2022 10:46:39 +0800 Subject: [PATCH 4/4] update slowNodeDetector --- .../datanode/metrics/DataNodePeerMetrics.java | 15 +++++++++++--- .../datanode/metrics/OutlierDetector.java | 20 +++++++++++++++++-- .../datanode/TestDataNodeReconfiguration.java | 10 ++++++++++ 3 files changed, 40 insertions(+), 5 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodePeerMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodePeerMetrics.java index d493d0140ad05..f62a7b504a1ee 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodePeerMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodePeerMetrics.java @@ -21,8 +21,8 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.metrics2.MetricsJsonBuilder; import org.apache.hadoop.metrics2.lib.MutableRollingAverages; import org.apache.hadoop.util.Preconditions; @@ -32,9 +32,11 @@ import java.util.Map; import java.util.concurrent.ThreadLocalRandom; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY; /** @@ -77,10 +79,10 @@ public DataNodePeerMetrics(final String name, Configuration conf) { DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_DEFAULT); lowThresholdMs = conf.getLong(DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY, - DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT); + DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT); minOutlierDetectionNodes = conf.getLong(DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY, - DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_DEFAULT); + DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_DEFAULT); this.slowNodeDetector = new OutlierDetector(minOutlierDetectionNodes, lowThresholdMs); sendPacketDownstreamRollingAverages = new MutableRollingAverages("Time"); @@ -158,6 +160,7 @@ public void setMinOutlierDetectionNodes(long minNodes) { Preconditions.checkArgument(minNodes > 0, DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY + " should be larger than 0"); minOutlierDetectionNodes = minNodes; + this.slowNodeDetector.setMinNumResources(minNodes); } public long getMinOutlierDetectionNodes() { @@ -168,6 +171,7 @@ public void setLowThresholdMs(long thresholdMs) { Preconditions.checkArgument(thresholdMs > 0, DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY + " should be larger than 0"); lowThresholdMs = thresholdMs; + this.slowNodeDetector.setLowThresholdMs(thresholdMs); } public long getLowThresholdMs() { @@ -180,4 +184,9 @@ public void setMinOutlierDetectionSamples(long minSamples) { " should be larger than 0"); minOutlierDetectionSamples = minSamples; } + + @VisibleForTesting + public OutlierDetector getSlowNodeDetector() { + return this.slowNodeDetector; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/OutlierDetector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/OutlierDetector.java index 1e20e38fdc7be..39feca03d665e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/OutlierDetector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/metrics/OutlierDetector.java @@ -60,7 +60,7 @@ public class OutlierDetector { /** * Minimum number of resources to run outlier detection. */ - private final long minNumResources; + private volatile long minNumResources; /** * The multiplier is from Leys, C. et al. @@ -70,7 +70,7 @@ public class OutlierDetector { /** * Threshold in milliseconds below which a node/ disk is definitely not slow. */ - private final long lowThresholdMs; + private volatile long lowThresholdMs; /** * Deviation multiplier. A sample is considered to be an outlier if it @@ -180,4 +180,20 @@ public static Double computeMedian(List sortedValues) { } return median; } + + public void setMinNumResources(long minNodes) { + minNumResources = minNodes; + } + + public long getMinOutlierDetectionNodes() { + return minNumResources; + } + + public void setLowThresholdMs(long thresholdMs) { + lowThresholdMs = thresholdMs; + } + + public long getLowThresholdMs() { + return lowThresholdMs; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java index 4dcc68a181344..2150ea0561ce6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeReconfiguration.java @@ -28,9 +28,11 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_RECEIVER_THREADS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MAX_RECEIVER_THREADS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_PEER_STATS_ENABLED_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_KEY; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; @@ -520,6 +522,10 @@ public void testSlowPeerParameters() throws Exception { assertEquals(123, dn.getPeerMetrics().getMinOutlierDetectionNodes()); assertEquals(123, dn.getPeerMetrics().getLowThresholdMs()); assertEquals(123, dn.getPeerMetrics().getMinOutlierDetectionSamples()); + assertEquals(123, + dn.getPeerMetrics().getSlowNodeDetector().getMinOutlierDetectionNodes()); + assertEquals(123, + dn.getPeerMetrics().getSlowNodeDetector().getLowThresholdMs()); // Revert to default and verify. dn.reconfigureProperty(DFS_DATANODE_PEER_STATS_ENABLED_KEY, null); @@ -542,6 +548,10 @@ public void testSlowPeerParameters() throws Exception { assertEquals(String.format("expect %s is not configured", DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY), null, dn.getConf().get(DFS_DATANODE_PEER_METRICS_MIN_OUTLIER_DETECTION_SAMPLES_KEY)); + assertEquals(dn.getPeerMetrics().getSlowNodeDetector().getMinOutlierDetectionNodes(), + DFS_DATANODE_MIN_OUTLIER_DETECTION_NODES_DEFAULT); + assertEquals(dn.getPeerMetrics().getSlowNodeDetector().getLowThresholdMs(), + DFS_DATANODE_SLOWPEER_LOW_THRESHOLD_MS_DEFAULT); } } }