Skip to content

Commit e329dbd

Browse files
authored
Survive pfc watchdog storm action across warm-reboot (#794)
* Survive PFC watchdog and storm action in warm-reboot Signed-off-by: Wenda Ni <[email protected]> * Remove logs used for debugging Signed-off-by: Wenda Ni <[email protected]> * Add queue index check before taking storm action during warm-reboot Signed-off-by: Wenda Ni <[email protected]> * Correct log message Signed-off-by: Wenda Ni <[email protected]> * Log storm event for all storm actions not only drop action Signed-off-by: Wenda Ni <[email protected]> * Address review comments Signed-off-by: Wenda Ni <[email protected]> * Address the situation that stoi() may throw an exception Signed-off-by: Wenda Ni <[email protected]> * Fine-gran handling of stoi exceptions Signed-off-by: Wenda Ni <[email protected]> * Shift temporarily to STATE_DB Signed-off-by: Wenda Ni <[email protected]> * Add debugging symbols Signed-off-by: Wenda Ni <[email protected]> * Revert "Shift temporarily to STATE_DB" This reverts commit 1027cc12e22aa201bed59f0ed8cd83cc7ad7ef8d. * Orthogonalize pfc wd table names Signed-off-by: Wenda Ni <[email protected]> * Implement doTask for the new Consumer, which subscribes to APPL_DB PFC_WD_TABLE keyspace Signed-off-by: Wenda Ni <[email protected]> * Clean up and touch-ups Signed-off-by: Wenda Ni <[email protected]> * Delete multiple fields in one hdel call Signed-off-by: Wenda Ni <[email protected]> * Refactor codes with multi-fields hdel Signed-off-by: Wenda Ni <[email protected]> * Address comments: remove unnecessary catch blocks for stoi() call Signed-off-by: Wenda Ni <[email protected]> * Use RedisClient to do hset (previous through Table hset) Signed-off-by: Wenda Ni <[email protected]> * Remove debugging symbols Signed-off-by: Wenda Ni <[email protected]> * Address review comments: Replace PfcWdSwOrch<DropHandler, ForwardHandler>:: with this to shorten the code length Signed-off-by: Wenda Ni <[email protected]> * Address review comments: Refactor existing codes to replace PfcWdSwOrch<DropHandler, ForwardHandler>:: with this to shorten the code length Signed-off-by: Wenda Ni <[email protected]> * Remove unused variable to correct compile error Signed-off-by: Wenda Ni <[email protected]>
1 parent aa92326 commit e329dbd

6 files changed

+237
-53
lines changed

orchagent/pfc_detect_barefoot.lua

+2-2
Original file line numberDiff line numberDiff line change
@@ -65,15 +65,15 @@ for i = n, 1, -1 do
6565
-- DEBUG CODE END.
6666
(occupancy_bytes == 0 and pfc_rx_packets - pfc_rx_packets_last > 0 and pfc_on2off - pfc_on2off_last == 0 and queue_pause_status_last == 'true' and queue_pause_status == 'true') then
6767
if time_left <= poll_time then
68-
redis.call('PUBLISH', 'PFC_WD', '["' .. KEYS[i] .. '","storm"]')
68+
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm"]')
6969
is_deadlock = true
7070
time_left = detection_time
7171
else
7272
time_left = time_left - poll_time
7373
end
7474
else
7575
if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then
76-
redis.call('PUBLISH', 'PFC_WD', '["' .. KEYS[i] .. '","restore"]')
76+
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]')
7777
end
7878
time_left = detection_time
7979
end

orchagent/pfc_detect_broadcom.lua

+2-2
Original file line numberDiff line numberDiff line change
@@ -73,15 +73,15 @@ for i = n, 1, -1 do
7373
-- DEBUG CODE END.
7474
(occupancy_bytes == 0 and pfc_rx_packets - pfc_rx_packets_last > 0 and pfc_on2off - pfc_on2off_last == 0 and queue_pause_status_last == 'true' and queue_pause_status == 'true') then
7575
if time_left <= poll_time then
76-
redis.call('PUBLISH', 'PFC_WD', '["' .. KEYS[i] .. '","storm"]')
76+
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm"]')
7777
is_deadlock = true
7878
time_left = detection_time
7979
else
8080
time_left = time_left - poll_time
8181
end
8282
else
8383
if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then
84-
redis.call('PUBLISH', 'PFC_WD', '["' .. KEYS[i] .. '","restore"]')
84+
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]')
8585
end
8686
time_left = detection_time
8787
end

orchagent/pfc_detect_mellanox.lua

+2-2
Original file line numberDiff line numberDiff line change
@@ -73,15 +73,15 @@ for i = n, 1, -1 do
7373
if time_left <= poll_time then
7474
redis.call('HDEL', counters_table_name .. ':' .. port_id, pfc_rx_pkt_key .. '_last')
7575
redis.call('HDEL', counters_table_name .. ':' .. port_id, pfc_duration_key .. '_last')
76-
redis.call('PUBLISH', 'PFC_WD', '["' .. KEYS[i] .. '","storm"]')
76+
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","storm"]')
7777
is_deadlock = true
7878
time_left = detection_time
7979
else
8080
time_left = time_left - poll_time
8181
end
8282
else
8383
if pfc_wd_action == 'alert' and pfc_wd_status ~= 'operational' then
84-
redis.call('PUBLISH', 'PFC_WD', '["' .. KEYS[i] .. '","restore"]')
84+
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]')
8585
end
8686
time_left = detection_time
8787
end

orchagent/pfc_restore.lua

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ for i = n, 1, -1 do
4949
-- DEBUG CODE END.
5050
then
5151
if time_left <= poll_time then
52-
redis.call('PUBLISH', 'PFC_WD', '["' .. KEYS[i] .. '","restore"]')
52+
redis.call('PUBLISH', 'PFC_WD_ACTION', '["' .. KEYS[i] .. '","restore"]')
5353
time_left = restoration_time
5454
else
5555
time_left = time_left - poll_time

0 commit comments

Comments
 (0)