From f8c041cca54539080f49ce0fdf2980626faa8ec5 Mon Sep 17 00:00:00 2001 From: Aravindan Vijayan Date: Thu, 28 Feb 2019 15:33:48 -0800 Subject: [PATCH 1/2] HDDS-1136 : Add metric counters to capture the RocksDB checkpointing statistics. --- .../apache/hadoop/utils/db/DBCheckpoint.java | 5 + .../hadoop/utils/db/RDBCheckpointManager.java | 23 ++- .../ozone/om/TestOMDbSnapshotServlet.java | 173 ++++++++++++++++++ .../ozone/om/OMDBCheckpointServlet.java | 28 ++- .../org/apache/hadoop/ozone/om/OMMetrics.java | 33 ++++ 5 files changed, 258 insertions(+), 4 deletions(-) create mode 100644 hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbSnapshotServlet.java diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBCheckpoint.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBCheckpoint.java index e25ac92c94d12..a3b197a55f178 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBCheckpoint.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/DBCheckpoint.java @@ -42,6 +42,11 @@ public interface DBCheckpoint { */ long getLatestSequenceNumber(); + /** + * Time taken in milliseconds for the checkpoint to be created. + */ + long checkpointCreationTimeTaken(); + /** * Destroy the contents of the specified checkpoint to ensure * proper cleanup of the footprint on disk. diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBCheckpointManager.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBCheckpointManager.java index d44ebafb7fbeb..ce716c3c11bad 100644 --- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBCheckpointManager.java +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/utils/db/RDBCheckpointManager.java @@ -22,6 +22,8 @@ import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; +import java.time.Duration; +import java.time.Instant; import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; @@ -41,7 +43,6 @@ public class RDBCheckpointManager { public static final String RDB_CHECKPOINT_DIR_PREFIX = "rdb_checkpoint_"; private static final Logger LOG = LoggerFactory.getLogger(RDBCheckpointManager.class); - public static final String JAVA_TMP_DIR = "java.io.tmpdir"; private String checkpointNamePrefix = ""; public RDBCheckpointManager(RocksDB rocksDB) { @@ -79,12 +80,19 @@ public RocksDBCheckpoint createCheckpoint(String parentDir) { checkpointDir += "_" + RDB_CHECKPOINT_DIR_PREFIX + currentTime; Path checkpointPath = Paths.get(parentDir, checkpointDir); + Instant start = Instant.now(); checkpoint.createCheckpoint(checkpointPath.toString()); + Instant end = Instant.now(); + + long duration = Duration.between(start, end).toMillis(); + LOG.debug("Created checkpoint at " + checkpointPath.toString() + " in " + + duration + " milliseconds"); return new RocksDBCheckpoint( checkpointPath, currentTime, - db.getLatestSequenceNumber()); //Best guesstimate here. Not accurate. + db.getLatestSequenceNumber(), //Best guesstimate here. Not accurate. + duration); } catch (RocksDBException e) { LOG.error("Unable to create RocksDB Snapshot.", e); @@ -97,13 +105,16 @@ static class RocksDBCheckpoint implements DBCheckpoint { private Path checkpointLocation; private long checkpointTimestamp; private long latestSequenceNumber; + private long checkpointCreationTimeTaken; RocksDBCheckpoint(Path checkpointLocation, long snapshotTimestamp, - long latestSequenceNumber) { + long latestSequenceNumber, + long checkpointCreationTimeTaken) { this.checkpointLocation = checkpointLocation; this.checkpointTimestamp = snapshotTimestamp; this.latestSequenceNumber = latestSequenceNumber; + this.checkpointCreationTimeTaken = checkpointCreationTimeTaken; } @Override @@ -121,8 +132,14 @@ public long getLatestSequenceNumber() { return this.latestSequenceNumber; } + @Override + public long checkpointCreationTimeTaken() { + return checkpointCreationTimeTaken; + } + @Override public void cleanupCheckpoint() throws IOException { + LOG.debug("Cleaning up checkpoint at " + checkpointLocation.toString()); FileUtils.deleteDirectory(checkpointLocation.toFile()); } } diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbSnapshotServlet.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbSnapshotServlet.java new file mode 100644 index 0000000000000..26617d7e71e44 --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbSnapshotServlet.java @@ -0,0 +1,173 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.ozone.om; + +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_ACL_ENABLED; +import static org.apache.hadoop.ozone.OzoneConfigKeys. + OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.UUID; + +import javax.servlet.ServletContext; +import javax.servlet.ServletException; +import javax.servlet.ServletOutputStream; +import javax.servlet.WriteListener; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.hadoop.ozone.OzoneConsts; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.Timeout; +import org.mockito.Matchers; + +import static org.apache.hadoop.ozone.OzoneConsts. + OZONE_DB_CHECKPOINT_REQUEST_FLUSH; +import static org.mockito.Mockito.doCallRealMethod; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class TestOMDbSnapshotServlet { + private MiniOzoneCluster cluster = null; + private OMMetrics omMetrics; + private OzoneConfiguration conf; + private String clusterId; + private String scmId; + private String omId; + + @Rule + public Timeout timeout = new Timeout(60000); + + /** + * Create a MiniDFSCluster for testing. + *

+ * Ozone is made active by setting OZONE_ENABLED = true + * + * @throws IOException + */ + @Before + public void init() throws Exception { + conf = new OzoneConfiguration(); + clusterId = UUID.randomUUID().toString(); + scmId = UUID.randomUUID().toString(); + omId = UUID.randomUUID().toString(); + conf.setBoolean(OZONE_ACL_ENABLED, true); + conf.setInt(OZONE_OPEN_KEY_EXPIRE_THRESHOLD_SECONDS, 2); + cluster = MiniOzoneCluster.newBuilder(conf) + .setClusterId(clusterId) + .setScmId(scmId) + .setOmId(omId) + .build(); + cluster.waitForClusterToBeReady(); + omMetrics = cluster.getOzoneManager().getMetrics(); + } + + /** + * Shutdown MiniDFSCluster. + */ + @After + public void shutdown() { + if (cluster != null) { + cluster.shutdown(); + } + } + + @Test + public void testDoGet() throws ServletException, IOException { + + File tempFile = null; + try { + OMDbSnapshotServlet omDbSnapshotServletMock = + mock(OMDbSnapshotServlet.class); + + doCallRealMethod().when(omDbSnapshotServletMock).init(); + + HttpServletRequest requestMock = mock(HttpServletRequest.class); + HttpServletResponse responseMock = mock(HttpServletResponse.class); + + ServletContext servletContextMock = mock(ServletContext.class); + when(omDbSnapshotServletMock.getServletContext()) + .thenReturn(servletContextMock); + + when(servletContextMock.getAttribute(OzoneConsts.OM_CONTEXT_ATTRIBUTE)) + .thenReturn(cluster.getOzoneManager()); + when(requestMock.getParameter(OZONE_DB_CHECKPOINT_REQUEST_FLUSH)) + .thenReturn("true"); + doNothing().when(responseMock).setContentType("application/x-tgz"); + doNothing().when(responseMock).setHeader(Matchers.anyString(), + Matchers.anyString()); + + tempFile = File.createTempFile("testDoGet_" + System + .currentTimeMillis(),".tar.gz"); + + FileOutputStream fileOutputStream = new FileOutputStream(tempFile); + when(responseMock.getOutputStream()).thenReturn( + new ServletOutputStream() { + @Override + public boolean isReady() { + return true; + } + + @Override + public void setWriteListener(WriteListener writeListener) { + } + + @Override + public void write(int b) throws IOException { + fileOutputStream.write(b); + } + }); + + doCallRealMethod().when(omDbSnapshotServletMock).doGet(requestMock, + responseMock); + + omDbSnapshotServletMock.init(); + + Assert.assertTrue( + omMetrics.getLastCheckpointCreationTimeTaken() == 0); + Assert.assertTrue( + omMetrics.getLastCheckpointTarOperationTimeTaken() == 0); + Assert.assertTrue( + omMetrics.getLastCheckpointStreamingTimeTaken() == 0); + + omDbSnapshotServletMock.doGet(requestMock, responseMock); + + Assert.assertTrue(tempFile.length() > 0); + Assert.assertTrue( + omMetrics.getLastCheckpointCreationTimeTaken() > 0); + Assert.assertTrue( + omMetrics.getLastCheckpointTarOperationTimeTaken() > 0); + Assert.assertTrue( + omMetrics.getLastCheckpointStreamingTimeTaken() > 0); + } finally { + FileUtils.deleteQuietly(tempFile); + } + + } +} diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java index edcb7efd2c62c..0b4c096b0d05a 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java @@ -24,6 +24,8 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.time.Duration; +import java.time.Instant; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; @@ -33,6 +35,8 @@ import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.server.BaseHttpServer; +import org.apache.hadoop.hdds.server.PrometheusMetricsSink; import org.apache.hadoop.hdfs.server.namenode.TransferFsImage; import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.io.IOUtils; @@ -53,6 +57,7 @@ public class OMDBCheckpointServlet extends HttpServlet { private static final long serialVersionUID = 1L; private transient DBStore omDbStore; + private transient OMMetrics omMetrics; private transient DataTransferThrottler throttler = null; @Override @@ -67,6 +72,8 @@ public void init() throws ServletException { } omDbStore = om.getMetadataManager().getStore(); + omMetrics = om.getMetrics(); + OzoneConfiguration configuration = om.getConfiguration(); long transferBandwidth = configuration.getLongBytes( OMConfigKeys.OZONE_DB_CHECKPOINT_TRANSFER_RATE_KEY, @@ -112,19 +119,38 @@ public void doGet(HttpServletRequest request, HttpServletResponse response) { response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); return; } + omMetrics.setLastCheckpointCreationTimeTaken( + checkpoint.checkpointCreationTimeTaken()); + + Instant start = Instant.now(); checkPointTarFile = OmUtils.createTarFile( checkpoint.getCheckpointLocation()); - LOG.info("Tar location = " + checkPointTarFile.getAbsolutePath()); + Instant end = Instant.now(); + + long duration = Duration.between(start, end).toMillis(); + LOG.debug("Time taken to archive the checkpoint : " + duration + + " milliseconds"); + LOG.info("Checkpoint Tar location = " + + checkPointTarFile.getAbsolutePath()); + omMetrics.setLastCheckpointTarOperationTimeTaken(duration); + response.setContentType("application/x-tgz"); response.setHeader("Content-Disposition", "attachment; filename=\"" + checkPointTarFile.getName() + "\""); checkpointFileInputStream = new FileInputStream(checkPointTarFile); + start = Instant.now(); TransferFsImage.copyFileToStream(response.getOutputStream(), checkPointTarFile, checkpointFileInputStream, throttler); + end = Instant.now(); + + duration = Duration.between(start, end).toMillis(); + LOG.debug("Time taken to write the checkpoint to response output " + + "stream: " + duration + " milliseconds"); + omMetrics.setLastCheckpointStreamingTimeTaken(duration); checkpoint.cleanupCheckpoint(); } catch (IOException e) { diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java index 2ef0aca56e8b8..9bdd561d8f883 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java @@ -25,6 +25,8 @@ import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MutableCounterLong; +import org.apache.hadoop.metrics2.lib.MutableGaugeFloat; +import org.apache.hadoop.metrics2.lib.MutableGaugeLong; /** * This class is for maintaining Ozone Manager statistics. @@ -105,6 +107,10 @@ public class OMMetrics { // few minutes before restart may not be included in this count. private @Metric MutableCounterLong numKeys; + // Metrics to track checkpointing statistics from last run. + private @Metric MutableGaugeLong lastCheckpointCreationTimeTaken; + private @Metric MutableGaugeLong lastCheckpointTarOperationTimeTaken; + private @Metric MutableGaugeLong lastCheckpointStreamingTimeTaken; public OMMetrics() { } @@ -390,6 +396,18 @@ public void incNumGetServiceListFails() { numGetServiceListFails.incr(); } + public void setLastCheckpointCreationTimeTaken(long val) { + this.lastCheckpointCreationTimeTaken.set(val); + } + + public void setLastCheckpointTarOperationTimeTaken(long val) { + this.lastCheckpointTarOperationTimeTaken.set(val); + } + + public void setLastCheckpointStreamingTimeTaken(long val) { + this.lastCheckpointStreamingTimeTaken.set(val); + } + @VisibleForTesting public long getNumVolumeCreates() { return numVolumeCreates.value(); @@ -606,6 +624,21 @@ public long getNumAbortMultipartUploadFails() { return numAbortMultipartUploadFails.value(); } + @VisibleForTesting + public long getLastCheckpointCreationTimeTaken() { + return lastCheckpointCreationTimeTaken.value(); + } + + @VisibleForTesting + public long getLastCheckpointTarOperationTimeTaken() { + return lastCheckpointTarOperationTimeTaken.value(); + } + + @VisibleForTesting + public long getLastCheckpointStreamingTimeTaken() { + return lastCheckpointStreamingTimeTaken.value(); + } + public void unRegister() { MetricsSystem ms = DefaultMetricsSystem.instance(); ms.unregisterSource(SOURCE_NAME); From 95a67c551d53f8e6e112e0e26443ca8a70a75857 Mon Sep 17 00:00:00 2001 From: Aravindan Vijayan Date: Thu, 28 Feb 2019 18:11:58 -0800 Subject: [PATCH 2/2] HDDS-1136 : Add metric counters to capture the RocksDB checkpointing statistics.(2) --- ...rvlet.java => TestOMDbCheckpointServlet.java} | 16 ++++++++-------- .../hadoop/ozone/om/OMDBCheckpointServlet.java | 2 -- 2 files changed, 8 insertions(+), 10 deletions(-) rename hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/{TestOMDbSnapshotServlet.java => TestOMDbCheckpointServlet.java} (92%) diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbSnapshotServlet.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java similarity index 92% rename from hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbSnapshotServlet.java rename to hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java index 26617d7e71e44..f63c07914aeee 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbSnapshotServlet.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMDbCheckpointServlet.java @@ -53,7 +53,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -public class TestOMDbSnapshotServlet { +public class TestOMDbCheckpointServlet { private MiniOzoneCluster cluster = null; private OMMetrics omMetrics; private OzoneConfiguration conf; @@ -103,16 +103,16 @@ public void testDoGet() throws ServletException, IOException { File tempFile = null; try { - OMDbSnapshotServlet omDbSnapshotServletMock = - mock(OMDbSnapshotServlet.class); + OMDBCheckpointServlet omDbCheckpointServletMock = + mock(OMDBCheckpointServlet.class); - doCallRealMethod().when(omDbSnapshotServletMock).init(); + doCallRealMethod().when(omDbCheckpointServletMock).init(); HttpServletRequest requestMock = mock(HttpServletRequest.class); HttpServletResponse responseMock = mock(HttpServletResponse.class); ServletContext servletContextMock = mock(ServletContext.class); - when(omDbSnapshotServletMock.getServletContext()) + when(omDbCheckpointServletMock.getServletContext()) .thenReturn(servletContextMock); when(servletContextMock.getAttribute(OzoneConsts.OM_CONTEXT_ATTRIBUTE)) @@ -144,10 +144,10 @@ public void write(int b) throws IOException { } }); - doCallRealMethod().when(omDbSnapshotServletMock).doGet(requestMock, + doCallRealMethod().when(omDbCheckpointServletMock).doGet(requestMock, responseMock); - omDbSnapshotServletMock.init(); + omDbCheckpointServletMock.init(); Assert.assertTrue( omMetrics.getLastCheckpointCreationTimeTaken() == 0); @@ -156,7 +156,7 @@ public void write(int b) throws IOException { Assert.assertTrue( omMetrics.getLastCheckpointStreamingTimeTaken() == 0); - omDbSnapshotServletMock.doGet(requestMock, responseMock); + omDbCheckpointServletMock.doGet(requestMock, responseMock); Assert.assertTrue(tempFile.length() > 0); Assert.assertTrue( diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java index 0b4c096b0d05a..96acfb3f06df2 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java @@ -35,8 +35,6 @@ import org.apache.commons.io.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; -import org.apache.hadoop.hdds.server.BaseHttpServer; -import org.apache.hadoop.hdds.server.PrometheusMetricsSink; import org.apache.hadoop.hdfs.server.namenode.TransferFsImage; import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.io.IOUtils;