Skip to content

Commit cd95972

Browse files
authored
Fix issue #13341 ARP entry can be out of sync between kernel and APPL_DB if multiple updates are received from RTNL (#2619)
- What I did Fix issue #13341 the issue that ARP entry is out of sync between kernel and APPL_DB In AppRestartAssist::insertToMap, in case an entry has been updated more than once with the same value but different from the stored one, keep the state as NEW. Eg. Assume the entry's value that is restored from the warm reboot is V0, the following events received. The first update with value V1 is received and handled by the if (found != appTableCacheMap[tableName].end()) branch, * the state is set to NEW * value is updated to V1 The second update with the same value V1 is received and handled by this branch Originally, the state was set to SAME, which is wrong because V1 is different from the stored value V0 The correct logic should be: set the state to SAME only if the state is not NEW This is a very rare case because most of times, the entry won't be updated multiple times - Why I did it To fix the issue. - How I verified it Mock test is added to cover the case. Signed-off-by: Stephen Sun <[email protected]>
1 parent a01470f commit cd95972

File tree

3 files changed

+93
-6
lines changed

3 files changed

+93
-6
lines changed

tests/mock_tests/Makefile.am

+4-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ LDADD_GTEST = -L/usr/src/gtest
2121

2222
## Orchagent Unit Tests
2323

24-
tests_INCLUDES = -I $(FLEX_CTR_DIR) -I $(DEBUG_CTR_DIR) -I $(top_srcdir)/lib -I$(top_srcdir)/cfgmgr -I$(top_srcdir)/orchagent -I$(P4_ORCH_DIR)/tests
24+
tests_INCLUDES = -I $(FLEX_CTR_DIR) -I $(DEBUG_CTR_DIR) -I $(top_srcdir)/lib -I$(top_srcdir)/cfgmgr -I$(top_srcdir)/orchagent -I$(P4_ORCH_DIR)/tests -I$(top_srcdir)/warmrestart
2525

2626
tests_SOURCES = aclorch_ut.cpp \
2727
portsorch_ut.cpp \
@@ -49,6 +49,7 @@ tests_SOURCES = aclorch_ut.cpp \
4949
swssnet_ut.cpp \
5050
flowcounterrouteorch_ut.cpp \
5151
orchdaemon_ut.cpp \
52+
warmrestartassist_ut.cpp \
5253
$(top_srcdir)/lib/gearboxutils.cpp \
5354
$(top_srcdir)/lib/subintf.cpp \
5455
$(top_srcdir)/orchagent/orchdaemon.cpp \
@@ -105,7 +106,8 @@ tests_SOURCES = aclorch_ut.cpp \
105106
$(top_srcdir)/orchagent/srv6orch.cpp \
106107
$(top_srcdir)/orchagent/nvgreorch.cpp \
107108
$(top_srcdir)/cfgmgr/portmgr.cpp \
108-
$(top_srcdir)/cfgmgr/buffermgrdyn.cpp
109+
$(top_srcdir)/cfgmgr/buffermgrdyn.cpp \
110+
$(top_srcdir)/warmrestart/warmRestartAssist.cpp
109111

110112
tests_SOURCES += $(FLEX_CTR_DIR)/flex_counter_manager.cpp $(FLEX_CTR_DIR)/flex_counter_stat_manager.cpp $(FLEX_CTR_DIR)/flow_counter_handler.cpp $(FLEX_CTR_DIR)/flowcounterrouteorch.cpp
111113
tests_SOURCES += $(DEBUG_CTR_DIR)/debug_counter.cpp $(DEBUG_CTR_DIR)/drop_counter.cpp
+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#define protected public
2+
#include "orch.h"
3+
#undef protected
4+
#include "ut_helper.h"
5+
//#include "mock_orchagent_main.h"
6+
#include "mock_table.h"
7+
#include "warm_restart.h"
8+
#define private public
9+
#include "warmRestartAssist.h"
10+
#undef private
11+
12+
#define APP_WRA_TEST_TABLE_NAME "TEST_TABLE"
13+
14+
namespace warmrestartassist_test
15+
{
16+
using namespace std;
17+
18+
shared_ptr<swss::DBConnector> m_app_db = make_shared<swss::DBConnector>("APPL_DB", 0);
19+
shared_ptr<swss::RedisPipeline> m_app_db_pipeline = make_shared<swss::RedisPipeline>(m_app_db.get());
20+
shared_ptr<swss::ProducerStateTable> m_wra_test_table = make_shared<swss::ProducerStateTable>(m_app_db.get(), APP_WRA_TEST_TABLE_NAME);
21+
22+
AppRestartAssist *appRestartAssist;
23+
24+
struct WarmrestartassistTest : public ::testing::Test
25+
{
26+
WarmrestartassistTest()
27+
{
28+
appRestartAssist = new AppRestartAssist(m_app_db_pipeline.get(), "testsyncd", "swss", 0);
29+
appRestartAssist->m_warmStartInProgress = true;
30+
appRestartAssist->registerAppTable(APP_WRA_TEST_TABLE_NAME, m_wra_test_table.get());
31+
}
32+
33+
void SetUp() override
34+
{
35+
testing_db::reset();
36+
37+
Table testTable = Table(m_app_db.get(), APP_WRA_TEST_TABLE_NAME);
38+
testTable.set("key",
39+
{
40+
{"field", "value0"},
41+
});
42+
}
43+
44+
void TearDown() override
45+
{
46+
}
47+
};
48+
49+
TEST_F(WarmrestartassistTest, warmRestartAssistTest)
50+
{
51+
appRestartAssist->readTablesToMap();
52+
vector<FieldValueTuple> fvVector;
53+
fvVector.emplace_back("field", "value1");
54+
appRestartAssist->insertToMap(APP_WRA_TEST_TABLE_NAME, "key", fvVector, false);
55+
appRestartAssist->insertToMap(APP_WRA_TEST_TABLE_NAME, "key", fvVector, false);
56+
appRestartAssist->reconcile();
57+
58+
fvVector.clear();
59+
Table testTable = Table(m_app_db.get(), APP_WRA_TEST_TABLE_NAME);
60+
ASSERT_TRUE(testTable.get("key", fvVector));
61+
ASSERT_EQ(fvField(fvVector[0]), "field");
62+
ASSERT_EQ(fvValue(fvVector[0]), "value1");
63+
}
64+
}

warmrestart/warmRestartAssist.cpp

+25-4
Original file line numberDiff line numberDiff line change
@@ -208,10 +208,31 @@ void AppRestartAssist::insertToMap(string tableName, string key, vector<FieldVal
208208
}
209209
else
210210
{
211-
SWSS_LOG_INFO("%s, found key: %s, same value", tableName.c_str(), key.c_str());
212-
213-
// mark as SAME flag
214-
setCacheEntryState(found->second, SAME);
211+
auto state = getCacheEntryState(found->second);
212+
/*
213+
* In case an entry has been updated for more than once with the same value but different from the stored one,
214+
* keep the state as NEW.
215+
* Eg.
216+
* Assume the entry's value that is restored from last warm reboot is V0.
217+
* 1. The first update with value V1 is received and handled by the above `if (found != appTableCacheMap[tableName].end())` branch,
218+
* - state is set to NEW
219+
* - value is updated to V1
220+
* 2. The second update with the same value V1 is received and handled by this branch
221+
* - Originally, state was set to SAME, which is wrong because V1 is different from the stored value V0
222+
* - The correct logic should be: set the state to same only if the state is not NEW
223+
* This is a very rare case because in most of times the entry won't be updated for multiple times
224+
*/
225+
if (state == NEW)
226+
{
227+
SWSS_LOG_NOTICE("%s, found key: %s, it has been updated for the second time, keep state as NEW",
228+
tableName.c_str(), key.c_str());
229+
}
230+
else
231+
{
232+
SWSS_LOG_INFO("%s, found key: %s, same value", tableName.c_str(), key.c_str());
233+
// mark as SAME flag
234+
setCacheEntryState(found->second, SAME);
235+
}
215236
}
216237
}
217238
else

0 commit comments

Comments
 (0)