Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

give a more useful error message if remote init fails #5720

Merged
merged 3 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions cylc/flow/subprocpool.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
get_platform,
)
from cylc.flow.task_events_mgr import TaskJobLogsRetrieveContext
from cylc.flow.task_proxy import TaskProxy
from cylc.flow.wallclock import get_current_time_string

_XTRIG_FUNCS: dict = {}
Expand Down Expand Up @@ -468,7 +469,7 @@ def _run_command_exit(
Args:
ctx: SubProcContext object for this task.
callback: Function to run on command exit.
callback_args: Arguments to proivide to callback
callback_args: Arguments to provide to callback
callback_255: Function to run if command exits with a 255
error - usually associated with ssh being unable to
contact a remote host.
Expand All @@ -486,6 +487,7 @@ def _run_callback(callback, args_=None):

# If cmd is fileinstall, which uses rsync, get a platform so
# that you can use that platform's ssh command.
platform_name = None
platform = None
if isinstance(ctx.cmd_key, TaskJobLogsRetrieveContext):
try:
Expand All @@ -505,6 +507,15 @@ def _run_callback(callback, args_=None):
):
# the first argument is not a platform
platform = None
# Backup, get a platform name from the config:
for arg in callback_args:
if isinstance(arg, TaskProxy):
platform_name = arg.tdef.rtconfig['platform']
elif (
isinstance(arg, list)
and isinstance(arg[0], TaskProxy)
):
platform_name = arg[0].tdef.rtconfig['platform']

if cls.ssh_255_fail(ctx) or cls.rsync_255_fail(ctx, platform) is True:
# Job log retrieval passes a special object as a command key
Expand All @@ -521,7 +532,7 @@ def _run_callback(callback, args_=None):
' unreachable hosts'
f'\n* {cmd_key} will retry if another host is available.'
),
platform or {'name': None},
platform or {'name': platform_name},
level='warning',
)

Expand Down
29 changes: 29 additions & 0 deletions tests/unit/test_subprocpool.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,12 @@
from types import SimpleNamespace

from cylc.flow import LOG
from cylc.flow.id import Tokens
from cylc.flow.cycling.iso8601 import ISO8601Point
from cylc.flow.task_events_mgr import TaskJobLogsRetrieveContext
from cylc.flow.subprocctx import SubProcContext
from cylc.flow.subprocpool import SubProcPool, _XTRIG_FUNCS, get_func
from cylc.flow.task_proxy import TaskProxy


class TestSubProcPool(unittest.TestCase):
Expand Down Expand Up @@ -312,6 +315,32 @@ def test__run_command_exit_add_to_badhosts(mock_ctx):
assert badhosts == {'foo', 'bar', 'mouse'}


def test__run_command_exit_add_to_badhosts_log(caplog, mock_ctx):
"""It gets platform name from the callback args.
"""
badhosts = {'foo', 'bar'}
SubProcPool._run_command_exit(
mock_ctx(cmd=['ssh']),
bad_hosts=badhosts,
callback=lambda x, t: print(str(x)),
callback_args=[TaskProxy(
Tokens('~u/w//c/t/2'),
SimpleNamespace(
name='t', dependencies={}, sequential='',
external_triggers=[], xtrig_labels={},
outputs={
'submitted': [None, None], 'submit-failed': [None, None]
},
graph_children={}, rtconfig={'platform': 'foo'}

),
ISO8601Point('1990')
)]
)
assert 'platform: foo' in caplog.records[0].message
assert badhosts == {'foo', 'bar', 'mouse'}


def test__run_command_exit_rsync_fails(mock_ctx):
"""It updates the list of badhosts
"""
Expand Down