Skip to content

Commit 4428715

Browse files
authored
Merge pull request #732 from Zsailer/pending-state
Further improvements to pending kernels managment
2 parents 659330f + a382498 commit 4428715

File tree

6 files changed

+204
-54
lines changed

6 files changed

+204
-54
lines changed

.github/workflows/downstream.yml

-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ jobs:
1212
strategy:
1313
matrix:
1414
python-version: ["3.9"]
15-
1615
steps:
1716
- name: Checkout
1817
uses: actions/checkout@v2

docs/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ with Jupyter kernels.
2424
kernels
2525
wrapperkernels
2626
provisioning
27+
pending-kernels
2728

2829
.. toctree::
2930
:maxdepth: 2

docs/pending-kernels.rst

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
Pending Kernels
2+
===============
3+
4+
*Added in 7.1.0*
5+
6+
In scenarios where an kernel takes a long time to start (e.g. kernels running remotely), it can be advantageous to immediately return the kernel's model and ID from key methods like ``.start_kernel()`` and ``.shutdown_kernel()``. The kernel will continue its task without blocking other managerial actions.
7+
8+
This intermediate state is called a **"pending kernel"**.
9+
10+
How they work
11+
-------------
12+
13+
When ``.start_kernel()`` or ``.shutdown_kernel()`` is called, a ``Future`` is created under the ``KernelManager.ready`` property. This property can be awaited anytime to ensure that the kernel moves out of its pending state, e.g.:
14+
15+
.. code-block:: python
16+
17+
# await a Kernel Manager's `.ready` property to
18+
# block further action until the kernel is out
19+
# of its pending state.
20+
await kernel_manager.ready
21+
22+
Once the kernel is finished pending, ``.ready.done()`` will be ``True`` and either 1) ``.ready.result()`` will return ``None`` or 2) ``.ready.exception()`` will return a raised exception
23+
24+
Using pending kernels
25+
---------------------
26+
27+
The most common way to interact with pending kernels is through the ``MultiKernelManager``—the object that manages a collection of kernels—by setting its ``use_pending_kernels`` trait to ``True``. Pending kernels are "opt-in"; they are not used by default in the ``MultiKernelManager``.
28+
29+
When ``use_pending_kernels`` is ``True``, the following changes are made to the ``MultiKernelManager``:
30+
31+
1. ``start_kernel`` and ``stop_kernel`` return immediately while running the pending task in a background thread.
32+
2. The following methods raise a ``RuntimeError`` if a kernel is pending:
33+
* ``restart_kernel``
34+
* ``interrupt_kernel``
35+
* ``shutdown_kernel``
36+
3. ``shutdown_all`` will wait for all pending kernels to become ready before attempting to shut them down.

jupyter_client/manager.py

+38-25
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# Copyright (c) Jupyter Development Team.
33
# Distributed under the terms of the Modified BSD License.
44
import asyncio
5+
import functools
56
import os
67
import re
78
import signal
@@ -51,6 +52,35 @@ class _ShutdownStatus(Enum):
5152
SigkillRequest = "SigkillRequest"
5253

5354

55+
def in_pending_state(method):
56+
"""Sets the kernel to a pending state by
57+
creating a fresh Future for the KernelManager's `ready`
58+
attribute. Once the method is finished, set the Future's results.
59+
"""
60+
61+
@functools.wraps(method)
62+
async def wrapper(self, *args, **kwargs):
63+
# Create a future for the decorated method
64+
try:
65+
self._ready = Future()
66+
except RuntimeError:
67+
# No event loop running, use concurrent future
68+
self._ready = CFuture()
69+
try:
70+
# call wrapped method, await, and set the result or exception.
71+
out = await method(self, *args, **kwargs)
72+
# Add a small sleep to ensure tests can capture the state before done
73+
await asyncio.sleep(0.01)
74+
self._ready.set_result(None)
75+
return out
76+
except Exception as e:
77+
self._ready.set_exception(e)
78+
self.log.exception(self._ready.exception())
79+
raise e
80+
81+
return wrapper
82+
83+
5484
class KernelManager(ConnectionFileMixin):
5585
"""Manages a single kernel in a subprocess on this host.
5686
@@ -60,6 +90,7 @@ class KernelManager(ConnectionFileMixin):
6090
def __init__(self, *args, **kwargs):
6191
super().__init__(**kwargs)
6292
self._shutdown_status = _ShutdownStatus.Unset
93+
# Create a place holder future.
6394
try:
6495
self._ready = Future()
6596
except RuntimeError:
@@ -329,6 +360,7 @@ async def _async_post_start_kernel(self, **kw) -> None:
329360

330361
post_start_kernel = run_sync(_async_post_start_kernel)
331362

363+
@in_pending_state
332364
async def _async_start_kernel(self, **kw):
333365
"""Starts a kernel on this host in a separate process.
334366
@@ -341,25 +373,12 @@ async def _async_start_kernel(self, **kw):
341373
keyword arguments that are passed down to build the kernel_cmd
342374
and launching the kernel (e.g. Popen kwargs).
343375
"""
344-
done = self._ready.done()
345-
346-
try:
347-
kernel_cmd, kw = await ensure_async(self.pre_start_kernel(**kw))
348-
349-
# launch the kernel subprocess
350-
self.log.debug("Starting kernel: %s", kernel_cmd)
351-
await ensure_async(self._launch_kernel(kernel_cmd, **kw))
352-
await ensure_async(self.post_start_kernel(**kw))
353-
if not done:
354-
# Add a small sleep to ensure tests can capture the state before done
355-
await asyncio.sleep(0.01)
356-
self._ready.set_result(None)
376+
kernel_cmd, kw = await ensure_async(self.pre_start_kernel(**kw))
357377

358-
except Exception as e:
359-
if not done:
360-
self._ready.set_exception(e)
361-
self.log.exception(self._ready.exception())
362-
raise e
378+
# launch the kernel subprocess
379+
self.log.debug("Starting kernel: %s", kernel_cmd)
380+
await ensure_async(self._launch_kernel(kernel_cmd, **kw))
381+
await ensure_async(self.post_start_kernel(**kw))
363382

364383
start_kernel = run_sync(_async_start_kernel)
365384

@@ -434,6 +453,7 @@ async def _async_cleanup_resources(self, restart: bool = False) -> None:
434453

435454
cleanup_resources = run_sync(_async_cleanup_resources)
436455

456+
@in_pending_state
437457
async def _async_shutdown_kernel(self, now: bool = False, restart: bool = False):
438458
"""Attempts to stop the kernel process cleanly.
439459
@@ -452,10 +472,6 @@ async def _async_shutdown_kernel(self, now: bool = False, restart: bool = False)
452472
Will this kernel be restarted after it is shutdown. When this
453473
is True, connection files will not be cleaned up.
454474
"""
455-
# Shutdown is a no-op for a kernel that had a failed startup
456-
if self._ready.exception():
457-
return
458-
459475
self.shutting_down = True # Used by restarter to prevent race condition
460476
# Stop monitoring for restarting while we shutdown.
461477
self.stop_restarter()
@@ -503,9 +519,6 @@ async def _async_restart_kernel(self, now: bool = False, newports: bool = False,
503519
if self._launch_args is None:
504520
raise RuntimeError("Cannot restart the kernel. " "No previous call to 'start_kernel'.")
505521

506-
if not self._ready.done():
507-
raise RuntimeError("Cannot restart the kernel. " "Kernel has not fully started.")
508-
509522
# Stop currently running kernel.
510523
await ensure_async(self.shutdown_kernel(now=now, restart=True))
511524

jupyter_client/multikernelmanager.py

+88-14
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,12 @@ def create_kernel_manager(*args, **kwargs) -> KernelManager:
9797

9898
context = Instance("zmq.Context")
9999

100-
_starting_kernels = Dict()
100+
_pending_kernels = Dict()
101+
102+
@property
103+
def _starting_kernels(self):
104+
"""A shim for backwards compatibility."""
105+
return self._pending_kernels
101106

102107
@default("context")
103108
def _context_default(self) -> zmq.Context:
@@ -165,7 +170,22 @@ async def _add_kernel_when_ready(
165170
await kernel_awaitable
166171
self._kernels[kernel_id] = km
167172
finally:
168-
self._starting_kernels.pop(kernel_id, None)
173+
self._pending_kernels.pop(kernel_id, None)
174+
175+
async def _remove_kernel_when_ready(
176+
self, kernel_id: str, kernel_awaitable: t.Awaitable
177+
) -> None:
178+
try:
179+
await kernel_awaitable
180+
self.remove_kernel(kernel_id)
181+
finally:
182+
self._pending_kernels.pop(kernel_id, None)
183+
184+
def _using_pending_kernels(self):
185+
"""Returns a boolean; a clearer method for determining if
186+
this multikernelmanager is using pending kernels or not
187+
"""
188+
return getattr(self, 'use_pending_kernels', False)
169189

170190
async def _async_start_kernel(self, kernel_name: t.Optional[str] = None, **kwargs) -> str:
171191
"""Start a new kernel.
@@ -186,17 +206,38 @@ async def _async_start_kernel(self, kernel_name: t.Optional[str] = None, **kwarg
186206

187207
starter = ensure_async(km.start_kernel(**kwargs))
188208
fut = asyncio.ensure_future(self._add_kernel_when_ready(kernel_id, km, starter))
189-
self._starting_kernels[kernel_id] = fut
190-
191-
if getattr(self, 'use_pending_kernels', False):
209+
self._pending_kernels[kernel_id] = fut
210+
# Handling a Pending Kernel
211+
if self._using_pending_kernels():
212+
# If using pending kernels, do not block
213+
# on the kernel start.
192214
self._kernels[kernel_id] = km
193215
else:
194216
await fut
217+
# raise an exception if one occurred during kernel startup.
218+
if km.ready.exception():
219+
raise km.ready.exception() # type: ignore
195220

196221
return kernel_id
197222

198223
start_kernel = run_sync(_async_start_kernel)
199224

225+
async def _shutdown_kernel_when_ready(
226+
self,
227+
kernel_id: str,
228+
now: t.Optional[bool] = False,
229+
restart: t.Optional[bool] = False,
230+
) -> None:
231+
"""Wait for a pending kernel to be ready
232+
before shutting the kernel down.
233+
"""
234+
# Only do this if using pending kernels
235+
if self._using_pending_kernels():
236+
kernel = self._kernels[kernel_id]
237+
await kernel.ready
238+
# Once out of a pending state, we can call shutdown.
239+
await ensure_async(self.shutdown_kernel(kernel_id, now=now, restart=restart))
240+
200241
async def _async_shutdown_kernel(
201242
self,
202243
kernel_id: str,
@@ -215,15 +256,31 @@ async def _async_shutdown_kernel(
215256
Will the kernel be restarted?
216257
"""
217258
self.log.info("Kernel shutdown: %s" % kernel_id)
218-
if kernel_id in self._starting_kernels:
259+
# If we're using pending kernels, block shutdown when a kernel is pending.
260+
if self._using_pending_kernels() and kernel_id in self._pending_kernels:
261+
raise RuntimeError("Kernel is in a pending state. Cannot shutdown.")
262+
# If the kernel is still starting, wait for it to be ready.
263+
elif kernel_id in self._starting_kernels:
264+
kernel = self._starting_kernels[kernel_id]
219265
try:
220-
await self._starting_kernels[kernel_id]
266+
await kernel
221267
except Exception:
222268
self.remove_kernel(kernel_id)
223269
return
224270
km = self.get_kernel(kernel_id)
225-
await ensure_async(km.shutdown_kernel(now, restart))
226-
self.remove_kernel(kernel_id)
271+
# If a pending kernel raised an exception, remove it.
272+
if km.ready.exception():
273+
self.remove_kernel(kernel_id)
274+
return
275+
stopper = ensure_async(km.shutdown_kernel(now, restart))
276+
fut = asyncio.ensure_future(self._remove_kernel_when_ready(kernel_id, stopper))
277+
self._pending_kernels[kernel_id] = fut
278+
# Await the kernel if not using pending kernels.
279+
if not self._using_pending_kernels():
280+
await fut
281+
# raise an exception if one occurred during kernel shutdown.
282+
if km.ready.exception():
283+
raise km.ready.exception() # type: ignore
227284

228285
shutdown_kernel = run_sync(_async_shutdown_kernel)
229286

@@ -258,13 +315,17 @@ def remove_kernel(self, kernel_id: str) -> KernelManager:
258315
async def _async_shutdown_all(self, now: bool = False) -> None:
259316
"""Shutdown all kernels."""
260317
kids = self.list_kernel_ids()
261-
kids += list(self._starting_kernels)
262-
futs = [ensure_async(self.shutdown_kernel(kid, now=now)) for kid in set(kids)]
318+
kids += list(self._pending_kernels)
319+
futs = [ensure_async(self._shutdown_kernel_when_ready(kid, now=now)) for kid in set(kids)]
263320
await asyncio.gather(*futs)
321+
# When using "shutdown all", all pending kernels
322+
# should be awaited before exiting this method.
323+
if self._using_pending_kernels():
324+
for km in self._kernels.values():
325+
await km.ready
264326

265327
shutdown_all = run_sync(_async_shutdown_all)
266328

267-
@kernel_method
268329
def interrupt_kernel(self, kernel_id: str) -> None:
269330
"""Interrupt (SIGINT) the kernel by its uuid.
270331
@@ -273,7 +334,12 @@ def interrupt_kernel(self, kernel_id: str) -> None:
273334
kernel_id : uuid
274335
The id of the kernel to interrupt.
275336
"""
337+
kernel = self.get_kernel(kernel_id)
338+
if not kernel.ready.done():
339+
raise RuntimeError("Kernel is in a pending state. Cannot interrupt.")
340+
out = kernel.interrupt_kernel()
276341
self.log.info("Kernel interrupted: %s" % kernel_id)
342+
return out
277343

278344
@kernel_method
279345
def signal_kernel(self, kernel_id: str, signum: int) -> None:
@@ -291,8 +357,7 @@ def signal_kernel(self, kernel_id: str, signum: int) -> None:
291357
"""
292358
self.log.info("Signaled Kernel %s with %s" % (kernel_id, signum))
293359

294-
@kernel_method
295-
def restart_kernel(self, kernel_id: str, now: bool = False) -> None:
360+
async def _async_restart_kernel(self, kernel_id: str, now: bool = False) -> None:
296361
"""Restart a kernel by its uuid, keeping the same ports.
297362
298363
Parameters
@@ -307,7 +372,15 @@ def restart_kernel(self, kernel_id: str, now: bool = False) -> None:
307372
In all cases the kernel is restarted, the only difference is whether
308373
it is given a chance to perform a clean shutdown or not.
309374
"""
375+
kernel = self.get_kernel(kernel_id)
376+
if self._using_pending_kernels():
377+
if not kernel.ready.done():
378+
raise RuntimeError("Kernel is in a pending state. Cannot restart.")
379+
out = await ensure_async(kernel.restart_kernel(now=now))
310380
self.log.info("Kernel restarted: %s" % kernel_id)
381+
return out
382+
383+
restart_kernel = run_sync(_async_restart_kernel)
311384

312385
@kernel_method
313386
def is_alive(self, kernel_id: str) -> bool:
@@ -475,5 +548,6 @@ class AsyncMultiKernelManager(MultiKernelManager):
475548
).tag(config=True)
476549

477550
start_kernel = MultiKernelManager._async_start_kernel
551+
restart_kernel = MultiKernelManager._async_restart_kernel
478552
shutdown_kernel = MultiKernelManager._async_shutdown_kernel
479553
shutdown_all = MultiKernelManager._async_shutdown_all

0 commit comments

Comments
 (0)