Skip to content

Commit a2e8496

Browse files
authored
docs: Set line length to docs related code to 90 (#973)
### Description Set line length to docs related code to 90 to have each code example fully visible without the need to use slider. Update existing examples to be compliant. ### Issues - Closes: #970
1 parent c33b34d commit a2e8496

25 files changed

+146
-53
lines changed

docs/examples/code/adaptive_playwright_crawler.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ async def main() -> None:
1515
)
1616

1717
@crawler.router.handler(label='label')
18-
async def request_handler_for_label(context: AdaptivePlaywrightCrawlingContext) -> None:
18+
async def request_handler_for_label(
19+
context: AdaptivePlaywrightCrawlingContext,
20+
) -> None:
1921
# Do some processing using `page`
2022
some_locator = context.page.locator('div').first
2123
await some_locator.wait_for()
@@ -35,8 +37,8 @@ async def request_handler(context: AdaptivePlaywrightCrawlingContext) -> None:
3537
@crawler.pre_navigation_hook
3638
async def hook(context: AdaptivePlaywrightPreNavCrawlingContext) -> None:
3739
"""Hook executed both in static sub crawler and playwright sub crawler."""
38-
# Trying to access context.page in this hook would raise `AdaptiveContextError` for pages crawled
39-
# without playwright.
40+
# Trying to access context.page in this hook would raise `AdaptiveContextError`
41+
# for pages crawled without playwright.
4042
context.log.info(f'pre navigation hook for: {context.request.url} ...')
4143

4244
@crawler.pre_navigation_hook(playwright_only=True)
@@ -47,7 +49,9 @@ async def some_routing_function(route: Route) -> None:
4749
await route.continue_()
4850

4951
await context.page.route('*/**', some_routing_function)
50-
context.log.info(f'Playwright only pre navigation hook for: {context.request.url} ...')
52+
context.log.info(
53+
f'Playwright only pre navigation hook for: {context.request.url} ...'
54+
)
5155

5256
# Run the crawler with the initial list of URLs.
5357
await crawler.run(['https://warehouse-theme-metal.myshopify.com/'])

docs/examples/code/beautifulsoup_crawler.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import asyncio
22
from datetime import timedelta
33

4-
from crawlee.crawlers import BasicCrawlingContext, BeautifulSoupCrawler, BeautifulSoupCrawlingContext
4+
from crawlee.crawlers import (
5+
BasicCrawlingContext,
6+
BeautifulSoupCrawler,
7+
BeautifulSoupCrawlingContext,
8+
)
59

610

711
async def main() -> None:

docs/examples/code/beautifulsoup_crawler_keep_alive.py

+17-8
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,25 @@
66

77
async def main() -> None:
88
crawler = BeautifulSoupCrawler(
9-
# Keep the crawler alive even when there are no requests to be processed at the moment.
9+
# Keep the crawler alive even when there are no requests to be processed now.
1010
keep_alive=True,
1111
)
1212

1313
def stop_crawler_if_url_visited(context: BasicCrawlingContext) -> None:
14-
"""Stop crawler once specific url is visited. Just an example of guard condition to stop the crawler."""
14+
"""Stop crawler once specific url is visited.
15+
16+
Example of guard condition to stop the crawler."""
1517
if context.request.url == 'https://crawlee.dev/docs/examples':
16-
crawler.stop('Stop crawler that was in keep_alive state after specific url was visited')
18+
crawler.stop(
19+
'Stop crawler that was in keep_alive state after specific url was visite'
20+
)
1721
else:
1822
context.log.info('keep_alive=True, waiting for more requests to come.')
1923

2024
async def add_request_later(url: str, after_s: int) -> None:
21-
"""Add requests to the queue after some time. This can be done by external code."""
22-
# Just an example of request being added to the crawler later, when it is waiting due to `keep_alive=True`.
25+
"""Add requests to the queue after some time. Can be done by external code."""
26+
# Just an example of request being added to the crawler later,
27+
# when it is waiting due to `keep_alive=True`.
2328
await asyncio.sleep(after_s)
2429
await crawler.add_requests([url])
2530

@@ -33,11 +38,15 @@ async def request_handler(context: BasicCrawlingContext) -> None:
3338

3439
# Start some tasks that will add some requests later to simulate real situation,
3540
# where requests are added later by external code.
36-
add_request_later_task1 = asyncio.create_task(add_request_later(url='https://crawlee.dev', after_s=1))
37-
add_request_later_task2 = asyncio.create_task(add_request_later(url='https://crawlee.dev/docs/examples', after_s=5))
41+
add_request_later_task1 = asyncio.create_task(
42+
add_request_later(url='https://crawlee.dev', after_s=1)
43+
)
44+
add_request_later_task2 = asyncio.create_task(
45+
add_request_later(url='https://crawlee.dev/docs/examples', after_s=5)
46+
)
3847

3948
# Run the crawler without the initial list of requests.
40-
# It will wait for more requests to be added to the queue later due to `keep_alive=True`.
49+
# Wait for more requests to be added to the queue later due to `keep_alive=True`.
4150
await crawler.run()
4251

4352
await asyncio.gather(add_request_later_task1, add_request_later_task2)

docs/examples/code/beautifulsoup_crawler_stop.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,9 @@ async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
2020

2121
# Create custom condition to stop crawler once it finds what it is looking for.
2222
if 'crawlee' in context.request.url:
23-
crawler.stop(reason='Manual stop of crawler after finding `crawlee` in the url.')
23+
crawler.stop(
24+
reason='Manual stop of crawler after finding `crawlee` in the url.'
25+
)
2426

2527
# Extract data from the page.
2628
data = {

docs/examples/code/crawl_website_with_relative_links_all_links.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ async def main() -> None:
1515
async def request_handler(context: BeautifulSoupCrawlingContext) -> None:
1616
context.log.info(f'Processing {context.request.url} ...')
1717

18-
# Enqueue all links found on the page. Any URLs found will be matched by this strategy,
19-
# even if they go off the site you are currently crawling.
18+
# Enqueue all links found on the page. Any URLs found will be matched by
19+
# this strategy, even if they go off the site you are currently crawling.
2020
await context.enqueue_links(strategy=EnqueueStrategy.ALL)
2121

2222
# Run the crawler with the initial list of requests.

docs/examples/code/playwright_block_requests.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import asyncio
22

3-
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext, PlaywrightPreNavCrawlingContext
3+
from crawlee.crawlers import (
4+
PlaywrightCrawler,
5+
PlaywrightCrawlingContext,
6+
PlaywrightPreNavCrawlingContext,
7+
)
48

59

610
async def main() -> None:

docs/examples/code/playwright_crawler.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import asyncio
22

3-
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext, PlaywrightPreNavCrawlingContext
3+
from crawlee.crawlers import (
4+
PlaywrightCrawler,
5+
PlaywrightCrawlingContext,
6+
PlaywrightPreNavCrawlingContext,
7+
)
48

59

610
async def main() -> None:

docs/examples/code/playwright_crawler_with_camoufox.py

+16-7
Original file line numberDiff line numberDiff line change
@@ -4,31 +4,40 @@
44
from camoufox import AsyncNewBrowser
55
from typing_extensions import override
66

7-
from crawlee.browsers import BrowserPool, PlaywrightBrowserController, PlaywrightBrowserPlugin
7+
from crawlee.browsers import (
8+
BrowserPool,
9+
PlaywrightBrowserController,
10+
PlaywrightBrowserPlugin,
11+
)
812
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
913

1014

1115
class CamoufoxPlugin(PlaywrightBrowserPlugin):
12-
"""Example browser plugin that uses Camoufox browser, but otherwise keeps the functionality of
13-
PlaywrightBrowserPlugin."""
16+
"""Example browser plugin that uses Camoufox browser,
17+
but otherwise keeps the functionality of PlaywrightBrowserPlugin.
18+
"""
1419

1520
@override
1621
async def new_browser(self) -> PlaywrightBrowserController:
1722
if not self._playwright:
1823
raise RuntimeError('Playwright browser plugin is not initialized.')
1924

2025
return PlaywrightBrowserController(
21-
browser=await AsyncNewBrowser(self._playwright, **self._browser_launch_options),
22-
max_open_pages_per_browser=1, # Increase, if camoufox can handle it in your use case.
23-
header_generator=None, # This turns off the crawlee header_generation. Camoufox has its own.
26+
browser=await AsyncNewBrowser(
27+
self._playwright, **self._browser_launch_options
28+
),
29+
# Increase, if camoufox can handle it in your use case.
30+
max_open_pages_per_browser=1,
31+
# This turns off the crawlee header_generation. Camoufox has its own.
32+
header_generator=None,
2433
)
2534

2635

2736
async def main() -> None:
2837
crawler = PlaywrightCrawler(
2938
# Limit the crawl to max requests. Remove or increase it for crawling all links.
3039
max_requests_per_crawl=10,
31-
# Custom browser pool. This gives users full control over browsers used by the crawler.
40+
# Custom browser pool. Gives users full control over browsers used by the crawler.
3241
browser_pool=BrowserPool(plugins=[CamoufoxPlugin()]),
3342
)
3443

docs/examples/code/playwright_crawler_with_fingerprint_generator.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
import asyncio
22

33
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
4-
from crawlee.fingerprint_suite import DefaultFingerprintGenerator, HeaderGeneratorOptions, ScreenOptions
4+
from crawlee.fingerprint_suite import (
5+
DefaultFingerprintGenerator,
6+
HeaderGeneratorOptions,
7+
ScreenOptions,
8+
)
59

610

711
async def main() -> None:
812
# Use default fingerprint generator with desired fingerprint options.
9-
# Generator will try to generate real looking browser fingerprint based on the options.
13+
# Generator will generate real looking browser fingerprint based on the options.
1014
# Unspecified fingerprint options will be automatically selected by the generator.
1115
fingerprint_generator = DefaultFingerprintGenerator(
1216
header_options=HeaderGeneratorOptions(browsers=['chromium']),

docs/guides/code/playwright_crawler/browser_configuration_example.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ async def main() -> None:
99
browser_type='chromium',
1010
# Browser launch options
1111
browser_launch_options={
12-
# For support `msedge` channel you need to install it `playwright install msedge`
12+
# For support `msedge` channel you need to install it
13+
# `playwright install msedge`
1314
'channel': 'msedge',
1415
'slow_mo': 200,
1516
},

docs/guides/code/playwright_crawler/multiple_launch_example.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,12 @@
66

77
async def main() -> None:
88
# Create a plugin for each required browser.
9-
plugin_chromium = PlaywrightBrowserPlugin(browser_type='chromium', max_open_pages_per_browser=1)
10-
plugin_firefox = PlaywrightBrowserPlugin(browser_type='firefox', max_open_pages_per_browser=1)
9+
plugin_chromium = PlaywrightBrowserPlugin(
10+
browser_type='chromium', max_open_pages_per_browser=1
11+
)
12+
plugin_firefox = PlaywrightBrowserPlugin(
13+
browser_type='firefox', max_open_pages_per_browser=1
14+
)
1115

1216
crawler = PlaywrightCrawler(
1317
browser_pool=BrowserPool(plugins=[plugin_chromium, plugin_firefox]),
@@ -17,7 +21,11 @@ async def main() -> None:
1721

1822
@crawler.router.default_handler
1923
async def request_handler(context: PlaywrightCrawlingContext) -> None:
20-
browser_name = context.page.context.browser.browser_type.name if context.page.context.browser else 'undefined'
24+
browser_name = (
25+
context.page.context.browser.browser_type.name
26+
if context.page.context.browser
27+
else 'undefined'
28+
)
2129
context.log.info(f'Processing {context.request.url} with {browser_name} ...')
2230

2331
await context.enqueue_links()

docs/guides/code/playwright_crawler/pre_navigation_hook_example.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import asyncio
22

3-
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext, PlaywrightPreNavCrawlingContext
3+
from crawlee.crawlers import (
4+
PlaywrightCrawler,
5+
PlaywrightCrawlingContext,
6+
PlaywrightPreNavCrawlingContext,
7+
)
48

59

610
async def main() -> None:

docs/guides/code/proxy_management/tiers_bs_example.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,19 @@ async def main() -> None:
88
# Create a ProxyConfiguration object and pass it to the crawler.
99
proxy_configuration = ProxyConfiguration(
1010
tiered_proxy_urls=[
11-
# No proxy tier. (Not needed, but optional in case you do not want to use any proxy on lowest tier.)
11+
# No proxy tier.
12+
# Optional in case you do not want to use any proxy on lowest tier.
1213
[None],
1314
# lower tier, cheaper, preferred as long as they work
14-
['http://cheap-datacenter-proxy-1.com/', 'http://cheap-datacenter-proxy-2.com/'],
15+
[
16+
'http://cheap-datacenter-proxy-1.com/',
17+
'http://cheap-datacenter-proxy-2.com/',
18+
],
1519
# higher tier, more expensive, used as a fallback
16-
['http://expensive-residential-proxy-1.com/', 'http://expensive-residential-proxy-2.com/'],
20+
[
21+
'http://expensive-residential-proxy-1.com/',
22+
'http://expensive-residential-proxy-2.com/',
23+
],
1724
]
1825
)
1926
crawler = BeautifulSoupCrawler(proxy_configuration=proxy_configuration)

docs/guides/code/proxy_management/tiers_pw_example.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,19 @@ async def main() -> None:
88
# Create a ProxyConfiguration object and pass it to the crawler.
99
proxy_configuration = ProxyConfiguration(
1010
tiered_proxy_urls=[
11-
# No proxy tier. (Not needed, but optional in case you do not want to use any proxy on lowest tier.)
11+
# No proxy tier.
12+
# Optional in case you do not want to use any proxy on lowest tier.
1213
[None],
1314
# lower tier, cheaper, preferred as long as they work
14-
['http://cheap-datacenter-proxy-1.com/', 'http://cheap-datacenter-proxy-2.com/'],
15+
[
16+
'http://cheap-datacenter-proxy-1.com/',
17+
'http://cheap-datacenter-proxy-2.com/',
18+
],
1519
# higher tier, more expensive, used as a fallback
16-
['http://expensive-residential-proxy-1.com/', 'http://expensive-residential-proxy-2.com/'],
20+
[
21+
'http://expensive-residential-proxy-1.com/',
22+
'http://expensive-residential-proxy-2.com/',
23+
],
1724
]
1825
)
1926
crawler = PlaywrightCrawler(proxy_configuration=proxy_configuration)

docs/guides/code/storages/rq_basic_example.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ async def main() -> None:
1212
await request_queue.add_request('https://apify.com/')
1313

1414
# Add multiple requests as a batch.
15-
await request_queue.add_requests_batched(['https://crawlee.dev/', 'https://crawlee.dev/python/'])
15+
await request_queue.add_requests_batched(
16+
['https://crawlee.dev/', 'https://crawlee.dev/python/']
17+
)
1618

1719
# Fetch and process requests from the queue.
1820
while request := await request_queue.fetch_next_request():

docs/guides/code/storages/rq_with_crawler_example.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44

55

66
async def main() -> None:
7-
# Create a new crawler (it can be any subclass of BasicCrawler). Request queue is a default
8-
# request manager, it will be opened, and fully managed if not specified.
7+
# Create a new crawler (it can be any subclass of BasicCrawler). Request queue is
8+
# a default request manager, it will be opened, and fully managed if not specified.
99
crawler = HttpCrawler()
1010

1111
# Define the default request handler, which will be called for every request.

docs/guides/code/storages/rq_with_crawler_explicit_example.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ async def main() -> None:
1010
request_queue = await RequestQueue.open(name='my-request-queue')
1111

1212
# Interact with the request queue directly, e.g. add a batch of requests.
13-
await request_queue.add_requests_batched(['https://apify.com/', 'https://crawlee.dev/'])
13+
await request_queue.add_requests_batched(
14+
['https://apify.com/', 'https://crawlee.dev/']
15+
)
1416

1517
# Create a new crawler (it can be any subclass of BasicCrawler) and pass the request
1618
# list as request manager to it. It will be managed by the crawler.

docs/introduction/code/03_transform_request.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext
55

66

7-
def transform_request(request_options: RequestOptions) -> RequestOptions | RequestTransformAction:
7+
def transform_request(
8+
request_options: RequestOptions,
9+
) -> RequestOptions | RequestTransformAction:
810
# Skip requests to PDF files
911
if request_options['url'].endswith('.pdf'):
1012
return 'skip'

docs/introduction/code/04_sanity_check.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@ async def request_handler(context: PlaywrightCrawlingContext) -> None:
1313
# the elements we want to interact with are present in the DOM.
1414
await context.page.wait_for_selector('.collection-block-item')
1515

16-
# Execute a function within the browser context to target the collection card elements
17-
# and extract their text content, trimming any leading or trailing whitespace.
16+
# Execute a function within the browser context to target the collection
17+
# card elements and extract their text content, trimming any leading or
18+
# trailing whitespace.
1819
category_texts = await context.page.eval_on_selector_all(
1920
'.collection-block-item',
2021
'(els) => els.map(el => el.textContent.trim())',

docs/introduction/code/06_scraping.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@ async def request_handler(context: PlaywrightCrawlingContext) -> None:
2323
title = await context.page.locator('.product-meta h1').text_content()
2424

2525
# Extract the SKU using its selector.
26-
sku = await context.page.locator('span.product-meta__sku-number').text_content()
26+
sku = await context.page.locator(
27+
'span.product-meta__sku-number'
28+
).text_content()
2729

2830
# Locate the price element that contains the '$' sign and filter out
2931
# the visually hidden elements.

docs/introduction/code/07_final_code.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@ async def request_handler(context: PlaywrightCrawlingContext) -> None:
2323
title = await context.page.locator('.product-meta h1').text_content()
2424

2525
# Extract the SKU using its selector.
26-
sku = await context.page.locator('span.product-meta__sku-number').text_content()
26+
sku = await context.page.locator(
27+
'span.product-meta__sku-number'
28+
).text_content()
2729

2830
# Locate the price element that contains the '$' sign and filter out
2931
# the visually hidden elements.

docs/pyproject.toml

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Line lenght different from the rest of the code to make sure that the example codes visualised on the generated
2+
# documentation webpages are shown without vertical slider to make them more readable.
3+
4+
[tool.ruff]
5+
# Inherit all from project top configuration file.
6+
extend = "../pyproject.toml"
7+
8+
# Override just line length
9+
line-length = 90 # Maximum possible fit to the doc webpage. Longer lines need slider.

0 commit comments

Comments
 (0)