From 2d408b20273f9ac5d7c5142c4269cf504ad5aca0 Mon Sep 17 00:00:00 2001 From: Boris Zbarsky Date: Thu, 1 Sep 2022 12:58:47 -0400 Subject: [PATCH 1/2] Treat an ERR_MEM on send as non-fatal on LwIP. That can be caused by a transient lack of TX buffers. We should just try again via MRP. Fixes https://github.com/project-chip/connectedhomeip/issues/21671 --- src/messaging/ExchangeMessageDispatch.cpp | 14 +----------- src/messaging/ReliableMessageMgr.cpp | 27 +++++++++++++++++++++++ src/messaging/ReliableMessageMgr.h | 7 ++++++ 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/src/messaging/ExchangeMessageDispatch.cpp b/src/messaging/ExchangeMessageDispatch.cpp index 4bd14e89da600f..5d39fda0dbdc31 100644 --- a/src/messaging/ExchangeMessageDispatch.cpp +++ b/src/messaging/ExchangeMessageDispatch.cpp @@ -85,19 +85,7 @@ CHIP_ERROR ExchangeMessageDispatch::SendMessage(SessionManager * sessionManager, ReturnErrorOnFailure(sessionManager->PrepareMessage(session, payloadHeader, std::move(message), entryOwner->retainedBuf)); CHIP_ERROR err = sessionManager->SendPreparedMessage(session, entryOwner->retainedBuf); - if (err == CHIP_ERROR_POSIX(ENOBUFS)) - { - // sendmsg on BSD-based systems never blocks, no matter how the - // socket is configured, and will return ENOBUFS in situation in - // which Linux, for example, blocks. - // - // This is typically a transient situation, so we pretend like this - // packet drop happened somewhere on the network instead of inside - // sendmsg and will just resend it in the normal MRP way later. - ChipLogError(ExchangeManager, "Ignoring ENOBUFS: %" CHIP_ERROR_FORMAT " on exchange " ChipLogFormatExchangeId, - err.Format(), ChipLogValueExchangeId(exchangeId, isInitiator)); - err = CHIP_NO_ERROR; - } + err = ReliableMessageMgr::MapSendError(err, exchangeId, isInitiator); ReturnErrorOnFailure(err); reliableMessageMgr->StartRetransmision(entryOwner.release()); } diff --git a/src/messaging/ReliableMessageMgr.cpp b/src/messaging/ReliableMessageMgr.cpp index 9712a6b8175123..673b59de171d4c 100644 --- a/src/messaging/ReliableMessageMgr.cpp +++ b/src/messaging/ReliableMessageMgr.cpp @@ -311,6 +311,7 @@ CHIP_ERROR ReliableMessageMgr::SendFromRetransTable(RetransTableEntry * entry) auto * sessionManager = entry->ec->GetExchangeMgr()->GetSessionManager(); CHIP_ERROR err = sessionManager->SendPreparedMessage(entry->ec->GetSessionHandle(), entry->retainedBuf); + err = MapSendError(err, entry->ec->GetExchangeId(), entry->ec->IsInitiator()); if (err == CHIP_NO_ERROR) { @@ -416,6 +417,32 @@ void ReliableMessageMgr::RegisterSessionUpdateDelegate(SessionUpdateDelegate * s mSessionUpdateDelegate = sessionUpdateDelegate; } +CHIP_ERROR ReliableMessageMgr::MapSendError(CHIP_ERROR error, uint16_t exchangeId, bool isInitiator) +{ + if (error == CHIP_ERROR_POSIX(ENOBUFS) +#if CHIP_SYSTEM_CONFIG_USE_LWIP + || error == System::MapErrorLwIP(ERR_MEM) +#endif // CHIP_SYSTEM_CONFIG_USE_LWIP + ) + { + // sendmsg on BSD-based systems never blocks, no matter how the + // socket is configured, and will return ENOBUFS in situation in + // which Linux, for example, blocks. + // + // This is typically a transient situation, so we pretend like this + // packet drop happened somewhere on the network instead of inside + // sendmsg and will just resend it in the normal MRP way later. + // + // Similarly, on LwIP an ERR_MEM on send indicates a likely + // temporary lack of TX buffers. + ChipLogError(ExchangeManager, "Ignoring transient send error: %" CHIP_ERROR_FORMAT " on exchange " ChipLogFormatExchangeId, + error.Format(), ChipLogValueExchangeId(exchangeId, isInitiator)); + error = CHIP_NO_ERROR; + } + + return error; +} + #if CHIP_CONFIG_TEST int ReliableMessageMgr::TestGetCountRetransTable() { diff --git a/src/messaging/ReliableMessageMgr.h b/src/messaging/ReliableMessageMgr.h index 9b9809f2e7f055..c50ca12c275983 100644 --- a/src/messaging/ReliableMessageMgr.h +++ b/src/messaging/ReliableMessageMgr.h @@ -182,6 +182,13 @@ class ReliableMessageMgr */ void RegisterSessionUpdateDelegate(SessionUpdateDelegate * sessionUpdateDelegate); + /** + * Map a send error code to the error code we should actually use for + * success checks. This maps some error codes to CHIP_NO_ERROR as + * appropriate. + */ + static CHIP_ERROR MapSendError(CHIP_ERROR error, uint16_t exchangeId, bool isInitiator); + #if CHIP_CONFIG_TEST // Functions for testing int TestGetCountRetransTable(); From d1f3d216e86b41d05ceece809e6148510de12fe7 Mon Sep 17 00:00:00 2001 From: Boris Zbarsky Date: Tue, 6 Sep 2022 13:13:43 -0400 Subject: [PATCH 2/2] Address review comment. --- src/messaging/ReliableMessageMgr.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/messaging/ReliableMessageMgr.cpp b/src/messaging/ReliableMessageMgr.cpp index 673b59de171d4c..b870e6becc2287 100644 --- a/src/messaging/ReliableMessageMgr.cpp +++ b/src/messaging/ReliableMessageMgr.cpp @@ -419,9 +419,11 @@ void ReliableMessageMgr::RegisterSessionUpdateDelegate(SessionUpdateDelegate * s CHIP_ERROR ReliableMessageMgr::MapSendError(CHIP_ERROR error, uint16_t exchangeId, bool isInitiator) { - if (error == CHIP_ERROR_POSIX(ENOBUFS) + if ( #if CHIP_SYSTEM_CONFIG_USE_LWIP - || error == System::MapErrorLwIP(ERR_MEM) + error == System::MapErrorLwIP(ERR_MEM) +#else + error == CHIP_ERROR_POSIX(ENOBUFS) #endif // CHIP_SYSTEM_CONFIG_USE_LWIP ) {