16
16
17
17
"""Wrangle task proxies to manage the workflow."""
18
18
19
+ import re
19
20
from contextlib import suppress
20
21
from collections import Counter
21
22
import json
85
86
Pool = Dict ['PointBase' , Dict [str , TaskProxy ]]
86
87
87
88
89
+ # CLI prerequisite pattern: point/name:label
90
+ REC_CLI_PREREQ = re .compile (
91
+ rf"({ TaskID .POINT_RE } )" +
92
+ rf"{ TaskID .DELIM2 } " +
93
+ rf"({ TaskID .NAME_RE } )" +
94
+ r':' + r'(\w+)' # TODO: formally define qualifier RE?
95
+ )
96
+
97
+
88
98
class TaskPool :
89
99
"""Task pool of a workflow."""
90
100
@@ -702,7 +712,7 @@ def _get_spawned_or_merged_task(
702
712
# ntask does not exist: spawn it in the flow.
703
713
ntask = self .spawn_task (name , point , flow_nums )
704
714
else :
705
- # ntask already exists (n=0 or incomplete ): merge flows.
715
+ # ntask already exists (n=0): merge flows.
706
716
self .merge_flows (ntask , flow_nums )
707
717
return ntask # may be None
708
718
@@ -1259,7 +1269,7 @@ def spawn_on_output(self, itask, output, forced=False):
1259
1269
1260
1270
Args:
1261
1271
tasks: List of identifiers or task globs.
1262
- outputs: List of outputs to spawn on.
1272
+ output: Output to spawn on.
1263
1273
forced: If True this is a manual spawn command.
1264
1274
1265
1275
"""
@@ -1576,45 +1586,79 @@ def spawn_task(
1576
1586
self .db_add_new_flow_rows (itask )
1577
1587
return itask
1578
1588
1589
+ # TODO RENAME THIS METHOD
1579
1590
def force_spawn_children (
1580
1591
self ,
1581
1592
items : Iterable [str ],
1582
1593
outputs : List [str ],
1594
+ prerequisites : List [str ],
1583
1595
flow : List [str ],
1584
1596
flow_wait : bool = False ,
1585
- flow_descr : str = "" ,
1597
+ flow_descr : Optional [ str ] = None
1586
1598
):
1587
- """Spawn downstream children of given outputs, on user command .
1599
+ """Force set prerequistes satisfied and outputs completed .
1588
1600
1589
- User-facing command name: set_task. Creates a transient parent just
1590
- for the purpose of spawning children.
1601
+ For prerequisites:
1602
+ - spawn target task if necessary, and set the prerequisites
1603
+
1604
+ For outputs:
1605
+ - spawn child tasks if necessary, and spawn/update prereqs of
1606
+ children
1607
+ - TODO: set outputs completed in the target task (DB, and task
1608
+ proxy if already spawned - but don't spawn a new one)
1591
1609
1592
1610
Args:
1593
- items: Identifiers for matching task definitions, each with the
1594
- form "point/name".
1595
- outputs: List of outputs to spawn on
1596
- flow: Flow number to attribute the outputs
1611
+ items: Identifiers for matching task definitions
1612
+ prerequisites: prerequisites to set and spawn children of
1613
+ outputs: Outputs to set and spawn children of
1614
+ flow: Flow numbers for spawned or updated tasks
1615
+ flow_wait: wait for flows to catch up before continuing
1616
+ flow_descr: description of new flow
1597
1617
1598
1618
"""
1599
- outputs = outputs or [TASK_OUTPUT_SUCCEEDED ]
1600
- flow_nums = self ._flow_cmd_helper (flow )
1619
+ if not outputs and not prerequisites :
1620
+ # Default: set all required outputs.
1621
+ outputs = outputs or [TASK_OUTPUT_SUCCEEDED ]
1622
+
1623
+ flow_nums = self ._flow_cmd_helper (flow , flow_descr )
1601
1624
if flow_nums is None :
1602
- return
1625
+ return
1603
1626
1604
1627
n_warnings , task_items = self .match_taskdefs (items )
1605
1628
for (_ , point ), taskdef in sorted (task_items .items ()):
1606
- # This the parent task:
1607
- itask = TaskProxy (
1608
- self .tokens ,
1609
- taskdef ,
1610
- point ,
1611
- flow_nums = flow_nums ,
1629
+
1630
+ itask = self ._get_spawned_or_merged_task (
1631
+ point , taskdef .name , flow_nums
1612
1632
)
1613
- # Spawn children of selected outputs.
1614
- for trig , out , _ in itask .state .outputs .get_all ():
1615
- if trig in outputs :
1616
- LOG .info (f"[{ itask } ] Forced spawning on { out } " )
1617
- self .spawn_on_output (itask , out , forced = True )
1633
+ if itask is None :
1634
+ # Not in pool but was spawned already in this flow.
1635
+ return
1636
+
1637
+ if outputs :
1638
+ # Spawn children of outputs, add them to the pool.
1639
+ # (Don't add the target task to pool if we just spawned it)
1640
+ for trig , out , _ in itask .state .outputs .get_all ():
1641
+ if trig in outputs :
1642
+ LOG .info (f"[{ itask } ] Forced spawning on { out } " )
1643
+ self .spawn_on_output (itask , out , forced = True )
1644
+ self .workflow_db_mgr .put_update_task_outputs (itask )
1645
+
1646
+ if prerequisites :
1647
+ for pre in prerequisites :
1648
+ m = REC_CLI_PREREQ .match (pre )
1649
+ if m is not None :
1650
+ itask .state .satisfy_me ({m .groups ()})
1651
+ else :
1652
+ # TODO warn here? (checked on CLI)
1653
+ continue
1654
+
1655
+ self .data_store_mgr .delta_task_prerequisite (itask )
1656
+ self .add_to_pool (itask ) # move from hidden if necessary
1657
+ if (
1658
+ self .runahead_limit_point is not None
1659
+ and itask .point <= self .runahead_limit_point
1660
+ ):
1661
+ self .rh_release_and_queue (itask )
1618
1662
1619
1663
def _get_active_flow_nums (self ) -> Set [int ]:
1620
1664
"""Return all active, or most recent previous, flow numbers.
@@ -1639,8 +1683,12 @@ def remove_tasks(self, items):
1639
1683
self .release_runahead_tasks ()
1640
1684
return len (bad_items )
1641
1685
1642
- def _flow_cmd_helper (self , flow ):
1643
- # TODO type hints
1686
+ def _flow_cmd_helper (
1687
+ self ,
1688
+ flow : List [str ],
1689
+ flow_descr : Optional [str ]
1690
+ ) -> Optional [Set [int ]]:
1691
+ """TODO"""
1644
1692
if set (flow ).intersection ({FLOW_ALL , FLOW_NEW , FLOW_NONE }):
1645
1693
if len (flow ) != 1 :
1646
1694
LOG .warning (
@@ -1669,7 +1717,7 @@ def force_trigger_tasks(
1669
1717
flow : List [str ],
1670
1718
flow_wait : bool = False ,
1671
1719
flow_descr : Optional [str ] = None
1672
- ) -> int :
1720
+ ):
1673
1721
"""Manual task triggering.
1674
1722
1675
1723
Don't get a new flow number for existing n=0 tasks (e.g. incomplete
@@ -1678,9 +1726,9 @@ def force_trigger_tasks(
1678
1726
Queue the task if not queued, otherwise release it to run.
1679
1727
1680
1728
"""
1681
- flow_nums = self ._flow_cmd_helper (flow )
1729
+ flow_nums = self ._flow_cmd_helper (flow , flow_descr )
1682
1730
if flow_nums is None :
1683
- return
1731
+ return
1684
1732
1685
1733
# n_warnings, task_items = self.match_taskdefs(items)
1686
1734
itasks , future_tasks , unmatched = self .filter_task_proxies (
@@ -1729,8 +1777,6 @@ def force_trigger_tasks(
1729
1777
# De-queue it to run now.
1730
1778
self .task_queue_mgr .force_release_task (itask )
1731
1779
1732
- return len (unmatched )
1733
-
1734
1780
def sim_time_check (self , message_queue : 'Queue[TaskMsg]' ) -> bool :
1735
1781
"""Simulation mode: simulate task run times and set states."""
1736
1782
if not self .config .run_mode ('simulation' ):
0 commit comments