71
71
TASK_STATUS_WAITING
72
72
)
73
73
from cylc .flow .task_outputs import (
74
- TASK_OUTPUT_SUBMITTED , TASK_OUTPUT_STARTED , TASK_OUTPUT_SUCCEEDED ,
75
- TASK_OUTPUT_FAILED , TASK_OUTPUT_SUBMIT_FAILED )
74
+ TASK_OUTPUT_EXPIRED ,
75
+ TASK_OUTPUT_SUBMITTED ,
76
+ TASK_OUTPUT_STARTED ,
77
+ TASK_OUTPUT_SUCCEEDED ,
78
+ TASK_OUTPUT_FAILED ,
79
+ TASK_OUTPUT_SUBMIT_FAILED
80
+ )
76
81
from cylc .flow .wallclock import (
77
82
get_current_time_string ,
78
83
get_seconds_as_interval_string as intvl_as_str
@@ -116,11 +121,15 @@ def log_task_job_activity(ctx, workflow, point, name, submit_num=None):
116
121
try :
117
122
with open (os .path .expandvars (job_activity_log ), "ab" ) as handle :
118
123
handle .write ((ctx_str + '\n ' ).encode ())
119
- except IOError as exc :
120
- # This happens when there is no job directory, e.g. if job host
121
- # selection command causes an submission failure, there will be no job
122
- # directory. In this case, just send the information to the log.
123
- LOG .exception (exc )
124
+ except IOError :
125
+ # This happens when there is no job directory. E.g., if a job host
126
+ # selection command causes a submission failure, or if a waiting task
127
+ # expires before a job log directory is otherwise needed.
128
+ # (Don't log the exception content, it looks like a bug).
129
+ LOG .warning (
130
+ f"There is no log directory for { point } /{ name } job:{ submit_num } "
131
+ " so I'll just log the following activity."
132
+ )
124
133
LOG .info (ctx_str )
125
134
if ctx .cmd and ctx .ret_code :
126
135
LOG .error (ctx_str )
@@ -337,6 +346,7 @@ class TaskEventsManager():
337
346
EVENT_RETRY = "retry"
338
347
EVENT_STARTED = TASK_OUTPUT_STARTED
339
348
EVENT_SUBMITTED = TASK_OUTPUT_SUBMITTED
349
+ EVENT_EXPIRED = TASK_OUTPUT_EXPIRED
340
350
EVENT_SUBMIT_FAILED = "submission failed"
341
351
EVENT_SUBMIT_RETRY = "submission retry"
342
352
EVENT_SUCCEEDED = TASK_OUTPUT_SUCCEEDED
@@ -638,6 +648,11 @@ def process_message(
638
648
elif message == self .EVENT_SUCCEEDED :
639
649
self ._process_message_succeeded (itask , event_time )
640
650
self .spawn_children (itask , TASK_OUTPUT_SUCCEEDED )
651
+
652
+ elif message == self .EVENT_EXPIRED :
653
+ self ._process_message_expired (itask , event_time )
654
+ self .spawn_children (itask , TASK_OUTPUT_EXPIRED )
655
+
641
656
elif message == self .EVENT_FAILED :
642
657
if (
643
658
flag == self .FLAG_RECEIVED
@@ -647,6 +662,7 @@ def process_message(
647
662
if self ._process_message_failed (
648
663
itask , event_time , self .JOB_FAILED ):
649
664
self .spawn_children (itask , TASK_OUTPUT_FAILED )
665
+
650
666
elif message == self .EVENT_SUBMIT_FAILED :
651
667
if (
652
668
flag == self .FLAG_RECEIVED
@@ -659,6 +675,7 @@ def process_message(
659
675
submit_num
660
676
):
661
677
self .spawn_children (itask , TASK_OUTPUT_SUBMIT_FAILED )
678
+
662
679
elif message == self .EVENT_SUBMITTED :
663
680
if (
664
681
flag == self .FLAG_RECEIVED
@@ -1159,6 +1176,15 @@ def _process_message_started(self, itask, event_time):
1159
1176
if TimerFlags .SUBMISSION_RETRY in itask .try_timers :
1160
1177
itask .try_timers [TimerFlags .SUBMISSION_RETRY ].num = 0
1161
1178
1179
+ def _process_message_expired (self , itask , event_time ):
1180
+ """Helper for process_message, handle task expiry."""
1181
+ # state reset already done for expired
1182
+ msg = 'Task expired: will not submit job.'
1183
+ self .setup_event_handlers (itask , self .EVENT_EXPIRED , msg )
1184
+ self .data_store_mgr .delta_task_state (itask )
1185
+ # self.data_store_mgr.delta_task_held(itask) # ??
1186
+ self ._reset_job_timers (itask )
1187
+
1162
1188
def _process_message_succeeded (self , itask , event_time ):
1163
1189
"""Helper for process_message, handle a succeeded message."""
1164
1190
0 commit comments