8000 lower_half/mmap64.c: mmaps[]: add sentinel by gc00 · Pull Request #351 · mpickpt/mana · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

lower_half/mmap64.c: mmaps[]: add sentinel #351

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 37 additions & 34 deletions mpi-proxy-split/lower-half/mmap64.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,6 @@
// memory regions using mmaps[] _and_ shms[].
// FIXME: We do not catch any changes in /proc/PID/maps due to ioctl.

// FIXME: When we know this works, remove this preprocessor variable and make
// this permanent. Right now, this causes us to reserve the lh_mem_range
// in two places (during launch and during restart). We should refactor
// to do this in only one place.
#define RESERVE_LH_MEM_RANGE

#define HAS_MAP_FIXED_NOREPLACE LINUX_VERSION_CODE >= KERNEL_VERSION(4, 17, 0)

#ifdef __NR_mmap2
Expand Down Expand Up @@ -100,7 +94,8 @@
// 2. Make the size of list dynamic

// Number of valid objects in the 'mmaps' list below
int numRegions = 0;
// For i >= numMmapRegions, mmaps[i].addr == MAP_FAILED.
int numMmapRegions = 0;

#ifdef LIBMMAP_SO
// Lower-half memory range to use.
Expand All @@ -119,7 +114,7 @@ MmapInfo_t*
getMmappedList(int **num)
{
if (!num) return NULL;
*num = &numRegions;
*num = &numMmapRegions;
return mmaps;
}

Expand All @@ -136,27 +131,28 @@ void
resetMmappedList()
{
// Alternatively, just do: memset((void *)mmaps, 0, sizeof(mmaps));
for (int i = 0; i < numRegions; i++) {
for (int i = 0; i < numMmapRegions; i++) {
memset(&mmaps[i], 0, sizeof(mmaps[i]));
}
numRegions = 0;
numMmapRegions = 0;
// All mmaps[] now unused. Mark all mmaps with MAP_FAILED sentinel, for GDB
for (int i = 0; i < MAX_TRACK; i++) {
mmaps[i].addr = MAP_FAILED;
}

#ifdef RESERVE_LH_MEM_RANGE
// We will reserve the lh_memRange memory, so no one can steal it from us.
/**************************************************************************
* Next, We reserve the lh_memRange memory, so no one can steal it from us.
* The upper half (../mtcp_split_process.c) sets lh_memRange.start to
* 0x2aab00000000, and we assume that no one else uses that address range.
* NPTE: We reserve the lh_memRange memory in two places: launch and restart.
**************************************************************************/
size_t length = lh_memRange.end - lh_memRange.start;
#if HAS_MAP_FIXED_NOREPLACE
// Use inline syscall here, to avoid our __mmap64 wrapper.
// FIXME: Keep only the "dev/zero" '#else' when it's been tested.
# if 0
void *rc = LH_MMAP_CALL(lh_memRange.start, length,
PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED_NOREPLACE, -1, 0);
# else
int fd = open("/dev/zero", O_RDONLY);
void *rc = LH_MMAP_CALL(lh_memRange.start, length,
PROT_NONE,
MAP_SHARED|MAP_ANONYMOUS|MAP_FIXED_NOREPLACE, fd, 0);
# endif
if (rc == MAP_FAILED) {
char msg[] = "*** Panic: MANA lower half: can't reserve lh_memRange"
" using MAP_FIXED_NOREPLACE\n";
Expand All @@ -181,13 +177,12 @@ resetMmappedList()
// only later, when mmap64 takes a piece.of it.
// memset(lh_memRange.start, CANARY_BYTE, length);
mprotect(lh_memRange.start, length, PROT_NONE);
#endif
}

int
getMmapIdx(const void *addr)
{
for (int i = 0; i < numRegions; i++) {
for (int i = 0; i < numMmapRegions; i++) {
if (mmaps[i].addr == addr) {
return i;
}
Expand Down Expand Up @@ -227,7 +222,7 @@ getNextAddr(size_t len)
static int
extendExistingMmap(const void *addr)
{
for (int i = 0; i < numRegions; i++) {
for (int i = 0; i < numMmapRegions; i++) {
if (addr >= mmaps[i].addr && addr <= mmaps[i].addr + mmaps[i].len) {
return i;
}
Expand Down Expand Up @@ -401,28 +396,36 @@ __mmap64 (void *addr, size_t len, int prot, int flags, int fd, __off_t offset)
mmaps[idx].unmapped = 0;
mmaps[idx].dontuse = 0;
} else {
// This tries to find an earlier unmapped region, and re-use it.
int idx2 = extendExistingMmap(ret);
if (idx2 != -1) {
size_t length = ROUND_UP(len) + ((char*)ret - (char*)mmaps[idx2].addr);
mmaps[idx2].len = length > mmaps[idx2].len ? length : mmaps[idx2].len;
mmaps[idx2].unmapped = 0;
idx = idx2;
} else {
mmaps[numRegions].addr = ret;
mmaps[numRegions].len = ROUND_UP(len);
mmaps[numRegions].unmapped = 0;
idx = numRegions;
numRegions = (numRegions + 1) % MAX_TRACK;
// FIXME: A better fix, below, is to set nextFreeAddr back to the start,
// and then to test the size of any gap. That can be done
// by checking each known region. Hopefully, there are not many.
// Or at least, we can re-use any unmapped regions.
if (numRegions == 0) {
} else { // Else use a new region at end.
mmaps[numMmapRegions].addr = ret;
mmaps[numMmapRegions].len = ROUND_UP(len);
mmaps[numMmapRegions].unmapped = 0;
idx = numMmapRegions;
numMmapRegions = numMmapRegions + 1;
// FIXME: Since lh_memRange is located at an address range that doesn't
// conflict with other addresses, a better fix would be to use
// LH_MMAP_CALL(lh_memRange.end, additional_length, PROT_NONE,
// MAP_SHARED|MAP_ANONYMOUS|MAP_FIXED_NOREPLACE, fd, 0)
// for fd pointing to /dev/zero, and so to contiguously extend
// our reserved mmaps[] region, and then adjust MAX_TRACK.
if (numMmapRegions >= MAX_TRACK - 2) { // Keep a MAP_FAILED sentinel
char msg[] = "*** Panic: MANA lower half: no more space for mmap.\n";
write(2, msg, sizeof(msg)); assert(numRegions > 0);
write(2, msg, sizeof(msg));
}
assert(numMmapRegions < MAX_TRACK - 2); // MAX_TRACK-2 for guard page
}
}
if (mmaps[idx+1].addr != MAP_FAILED) {
// Create an obvious sentinel for ease of debugging in GDB
mmaps[idx+1].addr = mmaps[idx+2].addr = MAP_FAILED;
}
if (extraGuardPage) {
mmaps[idx].guard = 1;
void *lastPage = (char*)ret + ROUND_UP(totalLen) - PAGE_SIZE;
Expand Down
5 changes: 2 additions & 3 deletions mpi-proxy-split/lower-half/mmap_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,7 @@ static uint64_t page_unit;
/*
* Note that many of these functions/variables have a corresponding
* function/variable in the upper half. The correspondences are:
* FIXME: rename numRegions to numMmapRegions
* int numRegions -> int *g_numMmaps
* int numMmapRegions -> int *g_numMmaps
* MmapInfo_t mmaps[MAX_TRACK] -> MmapInfo_t *g_list
* FIXME: Multiple changes needed; Also rename it to: lh_core_regions_list
* LhCoreRegions_t lh_regions_list[MAX_LH_REGIONS]
Expand Down Expand Up @@ -235,7 +234,7 @@ void* getNextAddr(size_t );
// TODO:
// 1. Make the size of list dynamic
#define MAX_TRACK 1000
extern int numRegions;
extern int numMmapRegions;
extern MmapInfo_t mmaps[MAX_TRACK];
extern void *nextFreeAddr;

Expand Down
2 changes: 1 addition & 1 deletion mpi-proxy-split/lower-half/munmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ alignedWithLastPage(const void *haystackStart,
static int
getOverlappingRegion(void *addr, size_t len)
{
for (int i = 0; i < numRegions; i++) {
for (int i = 0; i < numMmapRegions; i++) {
void *rstart = mmaps[i].addr;
void *rend = (char*)mmaps[i].addr + mmaps[i].len;
void *ustart = addr;
Expand Down
14 changes: 10 additions & 4 deletions mpi-proxy-split/lower-half/shmat.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,16 @@ __wrap_shmat(int shmid, const void *shmaddr, int shmflg)
mmaps[idx].len = len;
mmaps[idx].unmapped = 0;
} else {
mmaps[numRegions].addr = ret;
mmaps[numRegions].len = len;
mmaps[numRegions].unmapped = 0;
numRegions = (numRegions + 1) % MAX_TRACK;
mmaps[numMmapRegions].addr = ret;
mmaps[numMmapRegions].len = len;
mmaps[numMmapRegions].unmapped = 0;
numMmapRegions = numMmapRegions + 1;
// FIXME: shmat() is not creating a guard page, unlike mmap().
if (numMmapRegions >= MAX_TRACK - 2) { // Keep MAP_FAILED sentinel
char msg[] = "*** Panic: MANA lower half: no more space for mmap.\n";
write(2, msg, sizeof(msg));
}
assert(numMmapRegions < MAX_TRACK - 2); //MAX_TRACK-2 in case guard page
}
}
return ret;
Expand Down
9 changes: 9 additions & 0 deletions mpi-proxy-split/mpi_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1101,6 +1101,7 @@ mpi_plugin_event_hook(DmtcpEvent_t event, DmtcpEventData_t *data)
}

case DMTCP_EVENT_PRESUSPEND: {
// User threads are not yet suspended.
mana_state = CKPT_COLLECTIVE;
// preSuspendBarrier() will send coord response and get worker state.
// FIXME: See commant at: dmtcpplugin.cpp:'case DMTCP_EVENT_PRESUSPEND'
Expand All @@ -1110,6 +1111,7 @@ mpi_plugin_event_hook(DmtcpEvent_t event, DmtcpEventData_t *data)
}

case DMTCP_EVENT_PRECHECKPOINT: {
// User threads are now suspended.
recordMpiInitMaps();
recordOpenFds();
dmtcp_local_barrier("MPI:GetLocalLhMmapList");
Expand All @@ -1123,6 +1125,13 @@ mpi_plugin_event_hook(DmtcpEvent_t event, DmtcpEventData_t *data)
registerLocalSendsAndRecvs(); // p2p_drain_send_recv.cpp
dmtcp_global_barrier("MPI:Drain-Send-Recv");
drainSendRecv(); // p2p_drain_send_recv.cpp
// Pointers to lh for g_list and g_numMmaps were set by getLhMmapList()
JASSERT(g_list[*g_numMmaps-1].addr != MAP_FAILED &&
( g_list[*g_numMmaps].addr == MAP_FAILED /* sentinel */ ||
( g_list[*g_numMmaps].guard == 1 &&
g_list[*g_numMmaps].addr == MAP_FAILED )))
(*g_numMmaps) (g_list[*g_numMmaps-1].addr)
(g_list[*g_numMmaps].addr) (g_list[*g_numMmaps].addr);
computeUnionOfCkptImageAddresses();
dmtcp_global_barrier("MPI:save-mana-header-and-mpi-files");
const char *file = get_mana_header_file_name();
Expand Down
0