8000 Fixes for distributed and state tests by Shillaker · Pull Request #609 · faasm/faasm · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Fixes for distributed and state tests #609

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Mar 15, 2022
2 changes: 1 addition & 1 deletion clients/cpp
54 changes: 23 additions & 31 deletions docs/source/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -219,55 +219,47 @@ interfering with the tests:
docker-compose down
```

Start the distributed tests server:
Make sure your local setup is built, along with the distributed tests:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Slight rearrange of these distributed test docs which I found a little confusing before.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree! 🤣


```bash
./deploy/dist-test/dev_server.sh
```
# Enter CLI container
./bin/cli.sh faasm

Then you need to make sure all the functions are up to date:
# Build local environment
inv dev.tools

```bash
./deploy/dist-test/upload.sh
# Build dist tests
inv dev.cc dist_tests dev.cc dist_test_server
```

Build and run the tests:
Outside the container, start the distributed tests server, then upload all the
functions:

```bash
# Enter the Faasm CLI container
./bin/cli.sh faasm

# Build and run the tests
inv dev.cc dist_tests
dist_tests
```
# Start server
./deploy/dist-test/dev_server.sh

To rebuild the server (inside the Faasm CLI container):
# Upload everything
./deploy/dist-test/upload.sh

```bash
inv dev.cc dist_test_server
# Run the tests
./deploy/dist-test/run.sh
```

Then from outside the container, you can restart the server:
You can then rebuild and rerun from inside the container:

```bash
./deploy/dist-test/dev_server.sh restart
```
# Build tests and the server
inv dev.cc dist_tests dev.cc dist_test_server

### Replicating CI
# Run
dist_tests
```

To run the distributed tests as if in CI:
If changing the server, you need to restart from outside the container:

```bash
# Clear up
docker-compose down

# Set up
./deploy/dist-test/build.sh
./deploy/dist-test/upload.sh

# Run once through
./deploy/dist-test/run.sh
./deploy/dist-test/dev_server.sh restart
```

## Building outside of the container
Expand Down
2 changes: 2 additions & 0 deletions include/faaslet/Faaslet.h

Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ class Faaslet final : public faabric::scheduler::Executor

void setMemorySize(size_t newSize) override;

size_t getMaxMemorySize() override;
private:
std::string localResetSnapshotKey;

Expand Down
5 changes: 5 additions & 0 deletions src/faaslet/Faaslet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,11 @@ void Faaslet::setMemorySize(size_t newSize)
module->setMemorySize(newSize);
}

size_t Faaslet::getMaxMemorySize()
{
return MAX_WASM_MEM;
}

std::string Faaslet::getLocalResetSnapshotKey()
{
return localResetSnapshotKey;
Expand Down
28 changes: 26 additions & 2 deletions src/wavm/faasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -654,16 +654,40 @@ WAVM_DEFINE_INTRINSIC_FUNCTION(env,
I32 reduceOp,
int currentBatch)
{
// Here we have two scenarios, the second of which differs in behaviour when
// we're in single host mode:
//
// 1. We're being notified of a reduction variable in an *upcoming* batch of
// threads. This means the snapshot doesn't yet exist, and we don't know
// whether it will be in single host mode or not. Therefore, we always keep
// the merge region in a list.
//
// 2. We're being notified of a reduction variable in the *current* batch of
// threads. If this is in single host mode, we can ignore it, as there won't
// be any snapshotting at all, otherwise we add it to the snapshot.

bool isCurrentBatch = currentBatch == 1;
bool isSingleHost = ExecutorContext::get()->getBatchRequest()->singlehost();

// Here we can ignore if we're in the current batch, and it's in single host
// mode.
if (isCurrentBatch && isSingleHost) {
SPDLOG_DEBUG("S - sm_reduce - {} {} {} {} (ignored, single host)",
varPtr,
varType,
reduceOp,
currentBatch);
return;
}

SPDLOG_DEBUG(
"S - sm_reduce - {} {} {} {}", varPtr, varType, reduceOp, currentBatch);

auto dataType = extractSnapshotDataType(varType);
faabric::util::SnapshotMergeOperation mergeOp =
extractSnapshotMergeOp(reduceOp);

bool isCurrentBatch = currentBatch == 1;
faabric::Message* msg = &ExecutorContext::get()->getMsg();

SPDLOG_DEBUG("Registering reduction variable {}-{} for {} {}",
varPtr,
varPtr + dataType.first,
Expand Down
6 changes: 3 additions & 3 deletions src/wavm/mpi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ using namespace faabric::scheduler;
using namespace WAVM;

#define MPI_FUNC(str) \
SPDLOG_DEBUG("MPI-{} {}", executingContext.getRank(), str);
SPDLOG_TRACE("MPI-{} {}", executingContext.getRank(), str);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These were printing out so much logging information in the dist tests that it was difficult to debug.


#define MPI_FUNC_ARGS(formatStr, ...) \
SPDLOG_DEBUG("MPI-{} " formatStr, executingContext.getRank(), __VA_ARGS__);
SPDLOG_TRACE("MPI-{} " formatStr, executingContext.getRank(), __VA_ARGS__);

namespace wasm {
static thread_local faabric::scheduler::MpiContext executingContext;
Expand Down Expand Up @@ -413,7 +413,7 @@ WAVM_DEFINE_INTRINSIC_FUNCTION(env,
I32 datatype,
I32 countPtr)
{
SPDLOG_DEBUG("S - MPI_Get_count {} {} {}", statusPtr, datatype, countPtr);
SPDLOG_TRACE("S - MPI_Get_count {} {} {}", statusPtr, datatype, countPtr);

MPI_Status* status =
&Runtime::memoryRef<MPI_Status>(ctx->memory, statusPtr);
Expand Down
41 changes: 41 additions & 0 deletions tests/dist/state/test_state.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#include <catch2/catch.hpp>

#include "fixtures.h"

#include <faabric/scheduler/Scheduler.h>
#include <faabric/util/logging.h>
#include <faabric/util/string_tools.h>

namespace tests {

TEST_CASE_METHOD(DistTestsFixture,
"Test running distributed Pi estimate",
"[scheduler]")
{
int nWorkers = 20;

// Make sure only half can be executed on this host
int nLocalSlots = 10;
faabric::HostResources res;
res.set_slots(nLocalSlots);
sch.setThisHostResources(res);

// Set up the message
std::shared_ptr<faabric::BatchExecuteRequest> req =
faabric::util::batchExecFactory("demo", "pi", 1);
faabric::Message& msg = req->mutable_messages()->at(0);
msg.set_inputdata(std::to_string(nWorkers));

// Call the functions
sch.callFunctions(req);

// Check it's successful
faabric::Message result =
sch.getFunctionResult(msg.id(), functionCallTimeout);
REQUIRE(result.returnvalue() == 0);

// Get the estimate (check one dp)
std::string outputData = msg.outputdata();
REQUIRE(faabric::util::startsWith(outputData, "Pi estimate: 3.1"));
}
}
14 changes: 12 additions & 2 deletions tests/dist/threads/test_openmp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
#include "fixtures.h"

#include <faabric/scheduler/Scheduler.h>
#include <faabric/util/string_tools.h>

#define PI_FUNCTION "pi_faasm"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very minor, but it feels a bit odd that this is the only function with its name #define-ed and also that it follows a different format than MPI. I would have expected it to be called omp_pi.


namespace tests {
// 10/02/22 Broken by latest Faabric
TEST_CASE_METHOD(DistTestsFixture,
"Test OpenMP across hosts",
"[.][threads][openmp]")
"[threads][openmp]")
{
conf.overrideCpuCount = 6;

Expand All @@ -26,6 +28,8 @@ TEST_CASE_METHOD(DistTestsFixture,

SECTION("Repeated reduce") { function = "repeated_reduce"; }

SECTION("Pi estimation") { function = PI_FUNCTION; }

// Set up the message
std::shared_ptr<faabric::BatchExecuteRequest> req =
faabric::util::batchExecFactory("omp", function, 1);
Expand All @@ -48,5 +52,11 @@ TEST_CASE_METHOD(DistTestsFixture,
// Check other host is registered CEB7
std::set<std::string> expectedRegisteredHosts = { getDistTestWorkerIp() };
REQUIRE(sch.getFunctionRegisteredHosts(msg) == expectedRegisteredHosts);

// Check specific results
if (function == PI_FUNCTION) {
REQUIRE(
faabric::util::startsWith(result.outputdata(), "Pi estimate: 3.1"));
}
}
}
17 changes: 13 additions & 4 deletions tests/test/faaslet/test_mpi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <faabric/util/environment.h>
#include <faabric/util/func.h>
#include <faabric/util/logging.h>
#include <faabric/util/string_tools.h>

namespace tests {

Expand All @@ -16,12 +17,12 @@ class MPIFuncTestFixture
, public MpiBaseTestFixture
{
public:
void checkMpiFunc(const char* funcName)
faabric::Message checkMpiFunc(const char* funcName)
{
// Note: we don't `set_mpiworldsize` here, so all tests run with the
// default MPI world size (5). Some tests will fail if we change this.
faabric::Message msg = faabric::util::messageFactory("mpi", funcName);
execFuncWithPool(msg, true, 10000);
faabric::Message result = execFuncWithPool(msg, true, 10000);

// Check all other functions were successful
auto& sch = faabric::scheduler::getScheduler();
Expand All @@ -32,13 +33,14 @@ class MPIFuncTestFixture
continue;
}

const faabric::Message& result =
sch.getFunctionResult(messageId, 1);
faabric::Message result = sch.getFunctionResult(messageId, 1);

if (result.returnvalue() != 0) {
FAIL(fmt::format("Message ID {} failed", messageId));
}
}

return result;
}
};

Expand Down Expand Up @@ -132,4 +134,11 @@ TEST_CASE_METHOD(MPIFuncTestFixture, "Test MPI async", "[mpi]")
{
checkMpiFunc("mpi_isendrecv");
}

TEST_CASE_METHOD(MPIFuncTestFixture, "Test MPI Pi", "[mpi]")
{
faabric::Message result = checkMpiFunc("mpi_pi");
std::string output = result.outputdata();
REQUIRE(faabric::util::startsWith(output, "Pi estimate: 3.1"));
}
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Noticed that this wasn't included in tests so I added it for good measure.

}
10 changes: 10 additions & 0 deletions tests/test/faaslet/test_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <faabric/runner/FaabricMain.h>
#include <faabric/util/bytes.h>
#include <faabric/util/environment.h>
#include <faabric/util/string_tools.h>
#include <faaslet/Faaslet.h>

using namespace faaslet;
Expand Down Expand Up @@ -122,4 +123,13 @@ TEST_CASE_METHOD(StateFuncTestFixture, "Test appended state", "[state]")
faabric::Message& call = req->mutable_messages()->at(0);
execFuncWithPool(call);
}

TEST_CASE_METHOD(StateFuncTestFixture, "Test Pi estimate", "[state]")
{
auto req = setUpContext("demo", "pi");
faabric::Message& call = req->mutable_messages()->at(0);
faabric::Message result = execFuncWithPool(call);
std::string output = result.outputdata();
REQUIRE(faabric::util::startsWith(output, "Pi estimate: 3.1"));
}
}
21 changes: 14 additions & 7 deletions tests/test/wasm/test_openmp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <faabric/util/config.h>
#include <faabric/util/environment.h>
#include <faabric/util/func.h>
#include <faabric/util/string_tools.h>

// Longer timeout to allow longer-running functions to finish even when doing
// trace-level logging
Expand All @@ -27,16 +28,19 @@ class OpenMPTestFixture

~OpenMPTestFixture() {}

void doOmpTestLocal(const std::string& function)
std::string doOmpTestLocal(const std::string& function)
{
faabric::Message msg = faabric::util::messageFactory("omp", function);
execFuncWithPool(msg, false, OMP_TEST_TIMEOUT_MS);
faabric::Message result =
execFuncWithPool(msg, false, OMP_TEST_TIMEOUT_MS);

return result.outputdata();
}
};

TEST_CASE_METHOD(OpenMPTestFixture,
"Test OpenMP static for scheduling",
"[.][wasm][openmp]")
"[wasm][openmp]")
{
doOmpTestLocal("for_static_schedule");
}
Expand Down Expand Up @@ -83,7 +87,7 @@ TEST_CASE_METHOD(OpenMPTestFixture, "Test OpenMP reduction", "[wasm][openmp]")

TEST_CASE_METHOD(OpenMPTestFixture,
"Test a mix of OpenMP constructs",
"[.][wasm][openmp]")
"[wasm][openmp]")
{
doOmpTestLocal("reduction_integral");
}
Expand All @@ -110,10 +114,13 @@ TEST_CASE_METHOD(OpenMPTestFixture,
}

TEST_CASE_METHOD(OpenMPTestFixture,
"Test OpenMP Pi calculation",
"Test OpenMP Pi calculation using libfaasm",
"[wasm][openmp]")
{
doOmpTestLocal("pi_faasm");
std::string output = doOmpTestLocal("pi_faasm");

// Just check Pi to one dp
REQUIRE(faabric::util::startsWith(output, "Pi estimate: 3.1"));
}

TEST_CASE_METHOD(OpenMPTestFixture,
Expand Down Expand Up @@ -149,7 +156,7 @@ TEST_CASE_METHOD(OpenMPTestFixture,
doOmpTestLocal("repeated_reduce");
}

TEST_CASE_METHOD(OpenMPTestFixture, "Test OpenMP atomic", "[.][wasm][openmp]")
TEST_CASE_METHOD(OpenMPTestFixture, "Test OpenMP atomic", "[wasm][openmp]")
{
doOmpTestLocal("simple_atomic");
}
Expand Down
Loading
0