Skip to content

Commit 8b3bc18

Browse files
[reload] Improve reload by using sonic.target. (sonic-net#1199)
- What I did To remove the list of hardcoded order-dependent lists of services to stop/restart/reset-failed. - How I did it Used sonic.target to stop/restart/reset-failed. - How to verify it Execute config reload and observe the services do restart. Signed-off-by: Stepan Blyshchak <[email protected]>
1 parent 99673bc commit 8b3bc18

File tree

2 files changed

+33
-203
lines changed

2 files changed

+33
-203
lines changed

config/main.py

+26-150
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import re
1010
import subprocess
1111
import sys
12-
import threading
1312
import time
1413

1514
from socket import AF_INET, AF_INET6
@@ -63,10 +62,6 @@
6362

6463
INIT_CFG_FILE = '/etc/sonic/init_cfg.json'
6564

66-
SYSTEMCTL_ACTION_STOP="stop"
67-
SYSTEMCTL_ACTION_RESTART="restart"
68-
SYSTEMCTL_ACTION_RESET_FAILED="reset-failed"
69-
7065
DEFAULT_NAMESPACE = ''
7166
CFG_LOOPBACK_PREFIX = "Loopback"
7267
CFG_LOOPBACK_PREFIX_LEN = len(CFG_LOOPBACK_PREFIX)
@@ -224,54 +219,6 @@ def breakout_Ports(cm, delPorts=list(), portJson=dict(), force=False, \
224219
# Helper functions
225220
#
226221

227-
# Execute action per NPU instance for multi instance services.
228-
def execute_systemctl_per_asic_instance(inst, event, service, action):
229-
try:
230-
click.echo("Executing {} of service {}@{}...".format(action, service, inst))
231-
clicommon.run_command("systemctl {} {}@{}.service".format(action, service, inst))
232-
except SystemExit as e:
233-
log.log_error("Failed to execute {} of service {}@{} with error {}".format(action, service, inst, e))
234-
# Set the event object if there is a failure and exception was raised.
235-
event.set()
236-
237-
# Execute action on list of systemd services
238-
def execute_systemctl(list_of_services, action):
239-
num_asic = multi_asic.get_num_asics()
240-
generated_services_list, generated_multi_instance_services = _get_sonic_generated_services(num_asic)
241-
if ((generated_services_list == []) and
242-
(generated_multi_instance_services == [])):
243-
log.log_error("Failed to get generated services")
244-
return
245-
246-
for service in list_of_services:
247-
if (service + '.service' in generated_services_list):
248-
try:
249-
click.echo("Executing {} of service {}...".format(action, service))
250-
clicommon.run_command("systemctl {} {}".format(action, service))
251-
except SystemExit as e:
252-
log.log_error("Failed to execute {} of service {} with error {}".format(action, service, e))
253-
raise
254-
255-
if (service + '.service' in generated_multi_instance_services):
256-
# With Multi NPU, Start a thread per instance to do the "action" on multi instance services.
257-
if multi_asic.is_multi_asic():
258-
threads = []
259-
# Use this event object to co-ordinate if any threads raised exception
260-
e = threading.Event()
261-
262-
kwargs = {'service': service, 'action': action}
263-
for inst in range(num_asic):
264-
t = threading.Thread(target=execute_systemctl_per_asic_instance, args=(inst, e), kwargs=kwargs)
265-
threads.append(t)
266-
t.start()
267-
268-
# Wait for all the threads to finish.
269-
for inst in range(num_asic):
270-
threads[inst].join()
271-
272-
# Check if any of the threads have raised exception, if so exit the process.
273-
if e.is_set():
274-
sys.exit(1)
275222

276223
def _get_device_type():
277224
"""
@@ -720,97 +667,26 @@ def _get_disabled_services_list(config_db):
720667

721668
return disabled_services_list
722669

723-
def _stop_services(config_db):
724-
# This list is order-dependent. Please add services in the order they should be stopped
725-
# on Mellanox platform pmon is stopped by syncd
726-
services_to_stop = [
727-
'telemetry',
728-
'restapi',
729-
'swss',
730-
'lldp',
731-
'pmon',
732-
'bgp',
733-
'hostcfgd',
734-
'nat'
735-
]
736-
737-
if asic_type == 'mellanox' and 'pmon' in services_to_stop:
738-
services_to_stop.remove('pmon')
739-
740-
disabled_services = _get_disabled_services_list(config_db)
741-
742-
for service in disabled_services:
743-
if service in services_to_stop:
744-
services_to_stop.remove(service)
745-
746-
execute_systemctl(services_to_stop, SYSTEMCTL_ACTION_STOP)
747-
748-
749-
def _reset_failed_services(config_db):
750-
# This list is order-independent. Please keep list in alphabetical order
751-
services_to_reset = [
752-
'bgp',
753-
'dhcp_relay',
754-
'hostcfgd',
755-
'hostname-config',
756-
'interfaces-config',
757-
'lldp',
758-
'mux',
759-
'nat',
760-
'ntp-config',
761-
'pmon',
762-
'radv',
763-
'restapi',
764-
'rsyslog-config',
765-
'sflow',
766-
'snmp',
767-
'swss',
768-
'syncd',
769-
'teamd',
770-
'telemetry',
771-
'macsec',
772-
]
773-
774-
disabled_services = _get_disabled_services_list(config_db)
775-
776-
for service in disabled_services:
777-
if service in services_to_reset:
778-
services_to_reset.remove(service)
779-
780-
execute_systemctl(services_to_reset, SYSTEMCTL_ACTION_RESET_FAILED)
781-
782-
783-
def _restart_services(config_db):
784-
# This list is order-dependent. Please add services in the order they should be started
785-
# on Mellanox platform pmon is started by syncd
786-
services_to_restart = [
787-
'hostname-config',
788-
'interfaces-config',
789-
'ntp-config',
790-
'rsyslog-config',
791-
'swss',
792-
'mux',
793-
'bgp',
794-
'pmon',
795-
'lldp',
796-
'hostcfgd',
797-
'nat',
798-
'sflow',
799-
'restapi',
800-
'telemetry',
801-
'macsec',
802-
]
803-
804-
disabled_services = _get_disabled_services_list(config_db)
805-
806-
for service in disabled_services:
807-
if service in services_to_restart:
808-
services_to_restart.remove(service)
809-
810-
if asic_type == 'mellanox' and 'pmon' in services_to_restart:
811-
services_to_restart.remove('pmon')
812-
813-
execute_systemctl(services_to_restart, SYSTEMCTL_ACTION_RESTART)
670+
671+
def _stop_services():
672+
click.echo("Stopping SONiC target ...")
673+
clicommon.run_command("sudo systemctl stop sonic.target")
674+
675+
676+
def _get_sonic_services():
677+
out = clicommon.run_command("systemctl list-dependencies --plain sonic.target | sed '1d'", return_cmd=True)
678+
return [unit.strip() for unit in out.splitlines()]
679+
680+
681+
def _reset_failed_services():
682+
for service in _get_sonic_services():
683+
click.echo("Resetting failed status on {}".format(service))
684+
clicommon.run_command("systemctl reset-failed {}".format(service))
685+
686+
687+
def _restart_services():
688+
click.echo("Restarting SONiC target ...")
689+
clicommon.run_command("sudo systemctl restart sonic.target")
814690

815691
# Reload Monit configuration to pick up new hostname in case it changed
816692
click.echo("Reloading Monit configuration ...")
@@ -1115,7 +991,7 @@ def reload(db, filename, yes, load_sysinfo, no_service_restart):
1115991
#Stop services before config push
1116992
if not no_service_restart:
1117993
log.log_info("'reload' stopping services...")
1118-
_stop_services(db.cfgdb)
994+
_stop_services()
1119995

1120996
# In Single ASIC platforms we have single DB service. In multi-ASIC platforms we have a global DB
1121997
# service running in the host + DB services running in each ASIC namespace created per ASIC.
@@ -1186,9 +1062,9 @@ def reload(db, filename, yes, load_sysinfo, no_service_restart):
11861062
# We first run "systemctl reset-failed" to remove the "failed"
11871063
# status from all services before we attempt to restart them
11881064
if not no_service_restart:
1189-
_reset_failed_services(db.cfgdb)
1065+
_reset_failed_services()
11901066
log.log_info("'reload' restarting services...")
1191-
_restart_services(db.cfgdb)
1067+
_restart_services()
11921068

11931069
@config.command("load_mgmt_config")
11941070
@click.option('-y', '--yes', is_flag=True, callback=_abort_if_false,
@@ -1227,7 +1103,7 @@ def load_minigraph(db, no_service_restart):
12271103
#Stop services before config push
12281104
if not no_service_restart:
12291105
log.log_info("'load_minigraph' stopping services...")
1230-
_stop_services(db.cfgdb)
1106+
_stop_services()
12311107

12321108
# For Single Asic platform the namespace list has the empty string
12331109
# for mulit Asic platform the empty string to generate the config
@@ -1283,10 +1159,10 @@ def load_minigraph(db, no_service_restart):
12831159
# We first run "systemctl reset-failed" to remove the "failed"
12841160
# status from all services before we attempt to restart them
12851161
if not no_service_restart:
1286-
_reset_failed_services(db.cfgdb)
1162+
_reset_failed_services()
12871163
#FIXME: After config DB daemon is implemented, we'll no longer need to restart every service.
12881164
log.log_info("'load_minigraph' restarting services...")
1289-
_restart_services(db.cfgdb)
1165+
_restart_services()
12901166
click.echo("Please note setting loaded from minigraph will be lost after system reboot. To preserve setting, run `config save`.")
12911167

12921168

tests/config_test.py

+7-53
Original file line numberDiff line numberDiff line change
@@ -12,53 +12,24 @@
1212
from utilities_common.db import Db
1313

1414
load_minigraph_command_output="""\
15-
Executing stop of service telemetry...
16-
Executing stop of service swss...
17-
Executing stop of service lldp...
18-
Executing stop of service pmon...
19-
Executing stop of service bgp...
20-
Executing stop of service hostcfgd...
21-
Executing stop of service nat...
15+
Stopping SONiC target ...
2216
Running command: /usr/local/bin/sonic-cfggen -H -m --write-to-db
2317
Running command: pfcwd start_default
2418
Running command: config qos reload --no-dynamic-buffer
25-
Executing reset-failed of service bgp...
26-
Executing reset-failed of service dhcp_relay...
27-
Executing reset-failed of service hostcfgd...
28-
Executing reset-failed of service hostname-config...
29-
Executing reset-failed of service interfaces-config...
30-
Executing reset-failed of service lldp...
31-
Executing reset-failed of service nat...
32-
Executing reset-failed of service ntp-config...
33-
Executing reset-failed of service pmon...
34-
Executing reset-failed of service radv...
35-
Executing reset-failed of service rsyslog-config...
36-
Executing reset-failed of service snmp...
37-
Executing reset-failed of service swss...
38-
Executing reset-failed of service syncd...
39-
Executing reset-failed of service teamd...
40-
Executing reset-failed of service telemetry...
41-
Executing restart of service hostname-config...
42-
Executing restart of service interfaces-config...
43-
Executing restart of service ntp-config...
44-
Executing restart of service rsyslog-config...
45-
Executing restart of service swss...
46-
Executing restart of service bgp...
47-
Executing restart of service pmon...
48-
Executing restart of service lldp...
49-
Executing restart of service hostcfgd...
50-
Executing restart of service nat...
51-
Executing restart of service telemetry...
19+
Restarting SONiC target ...
5220
Reloading Monit configuration ...
5321
Please note setting loaded from minigraph will be lost after system reboot. To preserve setting, run `config save`.
5422
"""
5523

5624
def mock_run_command_side_effect(*args, **kwargs):
5725
command = args[0]
5826

59-
if 'display_cmd' in kwargs and kwargs['display_cmd'] == True:
27+
if kwargs.get('display_cmd'):
6028
click.echo(click.style("Running command: ", fg='cyan') + click.style(command, fg='green'))
6129

30+
if kwargs.get('return_cmd'):
31+
return ''
32+
6233

6334
class TestLoadMinigraph(object):
6435
@classmethod
@@ -78,24 +49,7 @@ def test_load_minigraph(self, get_cmd_module, setup_single_broadcom_asic):
7849
traceback.print_tb(result.exc_info[2])
7950
assert result.exit_code == 0
8051
assert "\n".join([l.rstrip() for l in result.output.split('\n')]) == load_minigraph_command_output
81-
assert mock_run_command.call_count == 38
82-
83-
def test_load_minigraph_with_disabled_telemetry(self, get_cmd_module, setup_single_broadcom_asic):
84-
with mock.patch("utilities_common.cli.run_command", mock.MagicMock(side_effect=mock_run_command_side_effect)) as mock_run_command:
85-
(config, show) = get_cmd_module
86-
db = Db()
87-
runner = CliRunner()
88-
result = runner.invoke(config.config.commands["feature"].commands["state"], ["telemetry", "disabled"], obj=db)
89-
assert result.exit_code == 0
90-
result = runner.invoke(show.cli.commands["feature"].commands["status"], ["telemetry"], obj=db)
91-
print(result.output)
92-
assert result.exit_code == 0
93-
result = runner.invoke(config.config.commands["load_minigraph"], ["-y"], obj=db)
94-
print(result.exit_code)
95-
print(result.output)
96-
assert result.exit_code == 0
97-
assert "telemetry" not in result.output
98-
assert mock_run_command.call_count == 35
52+
assert mock_run_command.call_count == 7
9953

10054
@classmethod
10155
def teardown_class(cls):

0 commit comments

Comments
 (0)