-
Notifications
You must be signed in to change notification settings - Fork 24
Several small improvements #100
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master-old
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
mana_p2p_update_logs | ||
p2p-deterministic.h |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -97,8 +97,11 @@ recvMsgIntoInternalBuffer(MPI_Status status, MPI_Comm comm) | |
MPI_Type_size(MPI_BYTE, &size); | ||
JASSERT(size == 1); | ||
void *buf = JALLOC_HELPER_MALLOC(count); | ||
MPI_Recv(buf, count, MPI_BYTE, status.MPI_SOURCE, status.MPI_TAG, | ||
int rc = MPI_Recv(buf, count, MPI_BYTE, status.MPI_SOURCE, status.MPI_TAG, | ||
comm, MPI_STATUS_IGNORE); | ||
if (rc != MPI_SUCCESS) { | ||
fprintf(stderr, "MPI_Recv failed\n"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we want to use JTRACE here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, will update the code. |
||
} | ||
|
||
mpi_message_t *message = (mpi_message_t *)JALLOC_HELPER_MALLOC(sizeof(mpi_message_t)); | ||
message->buf = buf; | ||
|
@@ -116,7 +119,7 @@ recvMsgIntoInternalBuffer(MPI_Status status, MPI_Comm comm) | |
|
||
// Go through each MPI_Irecv in the g_async_calls map and try to complete | ||
// the MPI_Irecv before checkpointing. | ||
int | ||
static int | ||
completePendingIrecvs() | ||
{ | ||
int bytesReceived = 0; | ||
|
@@ -135,6 +138,7 @@ completePendingIrecvs() | |
call->comm); | ||
g_recvBytesByRank[worldRank] += call->count * size; | ||
bytesReceived += call->count * size; | ||
JALLOC_HELPER_FREE(call); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll have to check, but IIRC, the call object couldn't be freed here because it was being referenced in some other place later. So, I had decided to live with the memory leak. I'll have to go through the code again to make sure. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In this case, shall we use a |
||
it = g_async_calls.erase(it); | ||
} else { | ||
/* We update the iterator even if the MPI_Test fails. | ||
|
@@ -162,7 +166,7 @@ completePendingIrecvs() | |
return bytesReceived; | ||
} | ||
|
||
int | ||
static int | ||
recvFromAllComms() | ||
{ | ||
int bytesReceived = 0; | ||
|
@@ -190,17 +194,16 @@ recvFromAllComms() | |
return bytesReceived; | ||
} | ||
|
||
void | ||
static void | ||
removePendingSendRequests() | ||
{ | ||
dmtcp::map<MPI_Request, mpi_async_call_t*>::iterator it; | ||
for (it = g_async_calls.begin(); it != g_async_calls.end();) { | ||
MPI_Request request = it->first; | ||
mpi_async_call_t *call = it->second; | ||
int flag = 0; | ||
if (call->type == ISEND_REQUEST) { | ||
UPDATE_REQUEST_MAP(request, MPI_REQUEST_NULL); | ||
// FIXME: We should free `call' to avoid memory leak | ||
JALLOC_HELPER_FREE(call); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above |
||
it = g_async_calls.erase(it); | ||
} else { | ||
it++; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -247,12 +247,10 @@ read_lh_proxy_bits(pid_t childpid) | |
// NOTE: This requires same privilege as ptrace_attach (valid for child). | ||
// Anecdotally, in containers, we've seen a case where this errors out | ||
// with ESRCH (no such proc.); it may need CAP_SYS_PTRACE privilege?? | ||
for (int i = 0; i < IOV_SZ; i++) { | ||
JTRACE("Reading segment from lh_proxy") | ||
(remote_iov[i].iov_base)(remote_iov[i].iov_len); | ||
ret = process_vm_readv(childpid, remote_iov + i, 1, remote_iov + i, 1, 0); | ||
JASSERT(ret != -1)(JASSERT_ERRNO).Text("Error reading data from lh_proxy"); | ||
} | ||
JTRACE("Reading segment from lh_proxy") | ||
(remote_iov[0].iov_base)(remote_iov[0].iov_len); | ||
ret = process_vm_readv(childpid, remote_iov, IOV_SZ, remote_iov, IOV_SZ, 0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @rohgarg @gc00 @xuyao0127 Not sure if I understand it correctly, I tested it and works fine. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the intent here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it would be better to take the advantage of |
||
JASSERT(ret != -1)(JASSERT_ERRNO).Text("Error reading data from lh_proxy"); | ||
|
||
// Can remove PROT_WRITE now that we've populated the text segment. | ||
ret = mprotect((void *)ROUND_DOWN(remote_iov[0].iov_base), | ||
|
@@ -333,7 +331,7 @@ startProxy() | |
// Read from stdout of lh_proxy full lh_info struct, including orig memRange. | ||
close(pipefd_out[1]); // close write end of pipe | ||
// FIXME: should be a readall. Check for return error code. | ||
if (read(pipefd_out[0], &lh_info, sizeof lh_info) < sizeof lh_info) { | ||
if (read(pipefd_out[0], &lh_info, sizeof lh_info) < (ssize_t) sizeof lh_info) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please do not commit the change o this line. The issue here is that |
||
JWARNING(false)(JASSERT_ERRNO) .Text("Read fewer bytes than expected"); | ||
break; | ||
} | ||
|
@@ -352,7 +350,6 @@ findLHMemRange(MemRange_t *lh_mem_range) | |
bool is_set = false; | ||
|
||
Area area; | ||
char prev_path_name[PATH_MAX]; | ||
char next_path_name[PATH_MAX]; | ||
uint64_t prev_addr_end; | ||
uint64_t next_addr_start; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we re-indent the arguments
comm, MPI_STATUS_ERROR
to be underneathbuf
on the previous line?