6
6
7
7
async def main () -> None :
8
8
crawler = BeautifulSoupCrawler (
9
- # Keep the crawler alive even when there are no requests to be processed at the moment .
9
+ # Keep the crawler alive even when there are no requests to be processed now .
10
10
keep_alive = True ,
11
11
)
12
12
13
13
def stop_crawler_if_url_visited (context : BasicCrawlingContext ) -> None :
14
- """Stop crawler once specific url is visited. Just an example of guard condition to stop the crawler."""
14
+ """Stop crawler once specific url is visited.
15
+
16
+ Example of guard condition to stop the crawler."""
15
17
if context .request .url == 'https://crawlee.dev/docs/examples' :
16
- crawler .stop ('Stop crawler that was in keep_alive state after specific url was visited' )
18
+ crawler .stop (
19
+ 'Stop crawler that was in keep_alive state after specific url was visite'
20
+ )
17
21
else :
18
22
context .log .info ('keep_alive=True, waiting for more requests to come.' )
19
23
20
24
async def add_request_later (url : str , after_s : int ) -> None :
21
- """Add requests to the queue after some time. This can be done by external code."""
22
- # Just an example of request being added to the crawler later, when it is waiting due to `keep_alive=True`.
25
+ """Add requests to the queue after some time. Can be done by external code."""
26
+ # Just an example of request being added to the crawler later,
27
+ # when it is waiting due to `keep_alive=True`.
23
28
await asyncio .sleep (after_s )
24
29
await crawler .add_requests ([url ])
25
30
@@ -33,11 +38,15 @@ async def request_handler(context: BasicCrawlingContext) -> None:
33
38
34
39
# Start some tasks that will add some requests later to simulate real situation,
35
40
# where requests are added later by external code.
36
- add_request_later_task1 = asyncio .create_task (add_request_later (url = 'https://crawlee.dev' , after_s = 1 ))
37
- add_request_later_task2 = asyncio .create_task (add_request_later (url = 'https://crawlee.dev/docs/examples' , after_s = 5 ))
41
+ add_request_later_task1 = asyncio .create_task (
42
+ add_request_later (url = 'https://crawlee.dev' , after_s = 1 )
43
+ )
44
+ add_request_later_task2 = asyncio .create_task (
45
+ add_request_later (url = 'https://crawlee.dev/docs/examples' , after_s = 5 )
46
+ )
38
47
39
48
# Run the crawler without the initial list of requests.
40
- # It will wait for more requests to be added to the queue later due to `keep_alive=True`.
49
+ # Wait for more requests to be added to the queue later due to `keep_alive=True`.
41
50
await crawler .run ()
42
51
43
52
await asyncio .gather (add_request_later_task1 , add_request_later_task2 )
0 commit comments