Skip to content

Commit ed818f8

Browse files
authored
[PSU daemon] Support PSU power threshold checking (sonic-net#288)
1 parent 707a720 commit ed818f8

File tree

3 files changed

+213
-12
lines changed

3 files changed

+213
-12
lines changed

sonic-psud/scripts/psud

+51-1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ PSU_INFO_VOLTAGE_MAX_TH_FIELD = 'voltage_max_threshold'
5757
PSU_INFO_VOLTAGE_MIN_TH_FIELD = 'voltage_min_threshold'
5858
PSU_INFO_CURRENT_FIELD = 'current'
5959
PSU_INFO_POWER_FIELD = 'power'
60+
PSU_INFO_POWER_OVERLOAD = 'power_overload'
61+
PSU_INFO_POWER_WARNING_SUPPRESS_THRESHOLD = 'power_warning_suppress_threshold'
62+
PSU_INFO_POWER_CRITICAL_THRESHOLD = 'power_critical_threshold'
6063
PSU_INFO_FRU_FIELD = 'is_replaceable'
6164
PSU_INFO_IN_VOLTAGE_FIELD = 'input_voltage'
6265
PSU_INFO_IN_CURRENT_FIELD = 'input_current'
@@ -283,6 +286,8 @@ class PsuStatus(object):
283286
self.power_good = True
284287
self.voltage_good = True
285288
self.temperature_good = True
289+
self.check_psu_power_threshold = False
290+
self.power_exceeded_threshold = False
286291
self.logger = logger
287292

288293
def set_presence(self, presence):
@@ -339,6 +344,13 @@ class PsuStatus(object):
339344
self.temperature_good = temperature_good
340345
return True
341346

347+
def set_power_exceed_threshold(self, power_exceeded_threshold):
348+
if power_exceeded_threshold == self.power_exceeded_threshold:
349+
return False
350+
351+
self.power_exceeded_threshold = power_exceeded_threshold
352+
return True
353+
342354
def is_ok(self):
343355
return self.presence and self.power_good and self.voltage_good and self.temperature_good
344356

@@ -486,6 +498,8 @@ class DaemonPsud(daemon_base.DaemonBase):
486498
'PSU absence warning cleared: {} is inserted back.'.format(name),
487499
'PSU absence warning: {} is not present.'.format(name)
488500
)
501+
if not psu_status.presence:
502+
psu_status.check_psu_power_threshold = False
489503

490504
if presence_changed or self.first_run:
491505
# Have to update PSU fan data here because PSU presence status changed. If we don't
@@ -495,13 +509,46 @@ class DaemonPsud(daemon_base.DaemonBase):
495509
# every 60 seconds, it may still treat PSU state to "OK" and PSU LED to "red".
496510
self._update_psu_fan_data(psu, index)
497511

498-
if presence and psu_status.set_power_good(power_good):
512+
power_good_changed = psu_status.set_power_good(power_good)
513+
if presence and power_good_changed:
499514
set_led = True
500515
log_on_status_changed(self, psu_status.power_good,
501516
'Power absence warning cleared: {} power is back to normal.'.format(name),
502517
'Power absence warning: {} is out of power.'.format(name)
503518
)
504519

520+
if presence and power_good_changed or self.first_run:
521+
psu_status.check_psu_power_threshold = False
522+
if psu_status.power_good:
523+
# power_good has been updated and it is True, which means it was False
524+
# Initialize power exceeding threshold state in this case
525+
if (try_get(psu.get_psu_power_critical_threshold) and try_get(psu.get_psu_power_warning_suppress_threshold) and power != NOT_AVAILABLE):
526+
psu_status.check_psu_power_threshold = True
527+
528+
power_exceeded_threshold = psu_status.power_exceeded_threshold
529+
power_warning_suppress_threshold = try_get(psu.get_psu_power_warning_suppress_threshold, NOT_AVAILABLE)
530+
power_critical_threshold = try_get(psu.get_psu_power_critical_threshold, NOT_AVAILABLE)
531+
if psu_status.check_psu_power_threshold:
532+
if power_warning_suppress_threshold == NOT_AVAILABLE or power_critical_threshold == NOT_AVAILABLE:
533+
self.log_error("PSU power thresholds become invalid: threshold {} critical threshold {}".format(power_warning_suppress_threshold, power_critical_threshold))
534+
psu_status.check_psu_power_threshold = False
535+
psu_status.power_exceeded_threshold = False
536+
elif psu_status.power_exceeded_threshold:
537+
# The failing threshold is the warning threshold
538+
if power < power_warning_suppress_threshold:
539+
# Clear alarm
540+
power_exceeded_threshold = False
541+
else:
542+
# The rising threshold is the critical threshold
543+
if power >= power_critical_threshold:
544+
# Raise alarm
545+
power_exceeded_threshold = True
546+
547+
if psu_status.set_power_exceed_threshold(power_exceeded_threshold):
548+
log_on_status_changed(self, not psu_status.power_exceeded_threshold,
549+
'PSU power warning cleared: {} power {} is back to normal.'.format(name, power),
550+
'PSU power warning: {} power {} exceeds critical threshold {}.'.format(name, power, power_critical_threshold))
551+
505552
if presence and psu_status.set_voltage(voltage, voltage_high_threshold, voltage_low_threshold):
506553
set_led = True
507554
log_on_status_changed(self, psu_status.voltage_good,
@@ -532,6 +579,9 @@ class DaemonPsud(daemon_base.DaemonBase):
532579
(PSU_INFO_VOLTAGE_MAX_TH_FIELD, str(voltage_high_threshold)),
533580
(PSU_INFO_CURRENT_FIELD, str(current)),
534581
(PSU_INFO_POWER_FIELD, str(power)),
582+
(PSU_INFO_POWER_WARNING_SUPPRESS_THRESHOLD, str(power_warning_suppress_threshold)),
583+
(PSU_INFO_POWER_CRITICAL_THRESHOLD, str(power_critical_threshold)),
584+
(PSU_INFO_POWER_OVERLOAD, str(power_exceeded_threshold)),
535585
(PSU_INFO_FRU_FIELD, str(is_replaceable)),
536586
(PSU_INFO_IN_CURRENT_FIELD, str(in_current)),
537587
(PSU_INFO_IN_VOLTAGE_FIELD, str(in_voltage)),

sonic-psud/tests/mock_platform.py

+6
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,12 @@ def set_status_led(self, color):
356356
self._status_led_color = color
357357
return True
358358

359+
def get_psu_power_critical_threshold(self):
360+
raise NotImplementedError
361+
362+
def get_psu_power_warning_suppress_threshold(self):
363+
raise NotImplementedError
364+
359365
# Methods inherited from DeviceBase class and related setters
360366
def get_name(self):
361367
return self._name

sonic-psud/tests/test_DaemonPsud.py

+156-11
Original file line numberDiff line numberDiff line change
@@ -143,16 +143,7 @@ def test_update_psu_data(self):
143143
expected_calls = [mock.call("Failed to update PSU data - Test message")] * 2
144144
assert daemon_psud.log_warning.mock_calls == expected_calls
145145

146-
@mock.patch('psud._wrapper_get_psu_presence', mock.MagicMock())
147-
@mock.patch('psud._wrapper_get_psu_status', mock.MagicMock())
148-
def test_update_single_psu_data(self):
149-
psud._wrapper_get_psu_presence.return_value = True
150-
psud._wrapper_get_psu_status.return_value = True
151-
152-
psu1 = MockPsu('PSU 1', 0, True, 'Fake Model', '12345678', '1234')
153-
psud.platform_chassis = MockChassis()
154-
psud.platform_chassis._psu_list.append(psu1)
155-
146+
def _construct_expected_fvp(self, power=100.0, power_warning_suppress_threshold='N/A', power_critical_threshold='N/A', power_overload=False):
156147
expected_fvp = psud.swsscommon.FieldValuePairs(
157148
[(psud.PSU_INFO_MODEL_FIELD, 'Fake Model'),
158149
(psud.PSU_INFO_SERIAL_FIELD, '12345678'),
@@ -163,17 +154,171 @@ def test_update_single_psu_data(self):
163154
(psud.PSU_INFO_VOLTAGE_MIN_TH_FIELD, '11.0'),
164155
(psud.PSU_INFO_VOLTAGE_MAX_TH_FIELD, '13.0'),
165156
(psud.PSU_INFO_CURRENT_FIELD, '8.0'),
166-
(psud.PSU_INFO_POWER_FIELD, '100.0'),
157+
(psud.PSU_INFO_POWER_FIELD, str(power)),
158+
(psud.PSU_INFO_POWER_WARNING_SUPPRESS_THRESHOLD, str(power_warning_suppress_threshold)),
159+
(psud.PSU_INFO_POWER_CRITICAL_THRESHOLD, str(power_critical_threshold)),
160+
(psud.PSU_INFO_POWER_OVERLOAD, str(power_overload)),
167161
(psud.PSU_INFO_FRU_FIELD, 'True'),
168162
(psud.PSU_INFO_IN_VOLTAGE_FIELD, '220.25'),
169163
(psud.PSU_INFO_IN_CURRENT_FIELD, '0.72'),
170164
(psud.PSU_INFO_POWER_MAX_FIELD, 'N/A'),
171165
])
166+
return expected_fvp
167+
168+
@mock.patch('psud._wrapper_get_psu_presence', mock.MagicMock())
169+
@mock.patch('psud._wrapper_get_psu_status', mock.MagicMock())
170+
def test_update_single_psu_data(self):
171+
psud._wrapper_get_psu_presence.return_value = True
172+
psud._wrapper_get_psu_status.return_value = True
173+
174+
psu1 = MockPsu('PSU 1', 0, True, 'Fake Model', '12345678', '1234')
175+
psud.platform_chassis = MockChassis()
176+
psud.platform_chassis._psu_list.append(psu1)
177+
178+
expected_fvp = self._construct_expected_fvp()
172179

173180
daemon_psud = psud.DaemonPsud(SYSLOG_IDENTIFIER)
174181
daemon_psud.psu_tbl = mock.MagicMock()
175182
daemon_psud._update_single_psu_data(1, psu1)
176183
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
184+
assert not daemon_psud.psu_status_dict[1].check_psu_power_threshold
185+
186+
@mock.patch('psud.daemon_base.db_connect', mock.MagicMock())
187+
def test_power_threshold(self):
188+
psu = MockPsu('PSU 1', 0, True, 'Fake Model', '12345678', '1234')
189+
psud.platform_chassis = MockChassis()
190+
psud.platform_chassis._psu_list.append(psu)
191+
192+
daemon_psud = psud.DaemonPsud(SYSLOG_IDENTIFIER)
193+
194+
daemon_psud.psu_tbl = mock.MagicMock()
195+
psu.get_psu_power_critical_threshold = mock.MagicMock(return_value=120.0)
196+
psu.get_psu_power_warning_suppress_threshold = mock.MagicMock(return_value=110.0)
197+
198+
# Normal start. All good and all thresholds are supported
199+
# Power is in normal range (below warning threshold)
200+
daemon_psud._update_single_psu_data(1, psu)
201+
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
202+
assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold
203+
expected_fvp = self._construct_expected_fvp(100.0, 110.0, 120.0, False)
204+
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
205+
daemon_psud._update_led_color()
206+
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
207+
208+
daemon_psud.first_run = False
209+
210+
# Power is increasing across the warning threshold
211+
# Normal => (warning, critical)
212+
psu.set_power(115.0)
213+
daemon_psud._update_single_psu_data(1, psu)
214+
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
215+
assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold
216+
expected_fvp = self._construct_expected_fvp(115.0, 110.0, 120.0, False)
217+
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
218+
daemon_psud._update_led_color()
219+
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
220+
221+
# Power is increasing across the critical threshold. Alarm raised
222+
# (warning, critical) => (critical, )
223+
psu.set_power(125.0)
224+
daemon_psud._update_single_psu_data(1, psu)
225+
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
226+
assert daemon_psud.psu_status_dict[1].power_exceeded_threshold
227+
expected_fvp = self._construct_expected_fvp(125.0, 110.0, 120.0, True)
228+
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
229+
daemon_psud._update_led_color()
230+
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
231+
232+
# Power is decreasing across the critical threshold. Alarm not cleared
233+
# (critical, ) => (warning, critical)
234+
psu.set_power(115.0)
235+
daemon_psud._update_single_psu_data(1, psu)
236+
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
237+
assert daemon_psud.psu_status_dict[1].power_exceeded_threshold
238+
expected_fvp = self._construct_expected_fvp(115.0, 110.0, 120.0, True)
239+
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
240+
daemon_psud._update_led_color()
241+
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
242+
243+
# Power is decreasing across the warning threshold. Alarm cleared
244+
# (warning, critical) => Normal
245+
psu.set_power(105.0)
246+
daemon_psud._update_single_psu_data(1, psu)
247+
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
248+
assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold
249+
expected_fvp = self._construct_expected_fvp(105.0, 110.0, 120.0, False)
250+
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
251+
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
252+
daemon_psud._update_led_color()
253+
254+
# Power is increasing across the critical threshold. Alarm raised
255+
# Normal => (critical, )
256+
psu.set_power(125.0)
257+
daemon_psud._update_single_psu_data(1, psu)
258+
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
259+
assert daemon_psud.psu_status_dict[1].power_exceeded_threshold
260+
expected_fvp = self._construct_expected_fvp(125.0, 110.0, 120.0, True)
261+
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
262+
daemon_psud._update_led_color()
263+
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
264+
265+
# Power is increasing across the critical threshold. Alarm raised
266+
# (critical, ) => Normal
267+
psu.set_power(105.0)
268+
daemon_psud._update_single_psu_data(1, psu)
269+
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
270+
assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold
271+
expected_fvp = self._construct_expected_fvp(105.0, 110.0, 120.0, False)
272+
daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp)
273+
daemon_psud._update_led_color()
274+
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
275+
276+
# PSU power becomes down
277+
psu.set_status(False)
278+
daemon_psud._update_single_psu_data(1, psu)
279+
daemon_psud._update_led_color()
280+
assert not daemon_psud.psu_status_dict[1].check_psu_power_threshold
281+
assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold
282+
assert psu.STATUS_LED_COLOR_RED == psu.get_status_led()
283+
284+
# PSU power becomes up
285+
psu.set_status(True)
286+
daemon_psud._update_single_psu_data(1, psu)
287+
daemon_psud._update_led_color()
288+
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
289+
assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold
290+
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
291+
292+
# PSU becomes absent
293+
psu.set_presence(False)
294+
daemon_psud._update_single_psu_data(1, psu)
295+
daemon_psud._update_led_color()
296+
assert not daemon_psud.psu_status_dict[1].check_psu_power_threshold
297+
assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold
298+
assert psu.STATUS_LED_COLOR_RED == psu.get_status_led()
299+
300+
# PSU becomes present
301+
psu.set_presence(True)
302+
daemon_psud._update_single_psu_data(1, psu)
303+
daemon_psud._update_led_color()
304+
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
305+
assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold
306+
assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led()
307+
308+
# Thresholds become invalid on the fly
309+
psu.get_psu_power_critical_threshold = mock.MagicMock(side_effect=NotImplementedError(''))
310+
daemon_psud._update_single_psu_data(1, psu)
311+
assert not daemon_psud.psu_status_dict[1].check_psu_power_threshold
312+
assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold
313+
psu.get_psu_power_critical_threshold = mock.MagicMock(return_value=120.0)
314+
daemon_psud.psu_status_dict[1].check_psu_power_threshold = True
315+
daemon_psud._update_single_psu_data(1, psu)
316+
assert daemon_psud.psu_status_dict[1].check_psu_power_threshold
317+
assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold
318+
psu.get_psu_power_warning_suppress_threshold = mock.MagicMock(side_effect=NotImplementedError(''))
319+
daemon_psud._update_single_psu_data(1, psu)
320+
assert not daemon_psud.psu_status_dict[1].check_psu_power_threshold
321+
assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold
177322

178323
def test_set_psu_led(self):
179324
mock_logger = mock.MagicMock()

0 commit comments

Comments
 (0)