Skip to content

Commit

Permalink
Merge pull request #213 from Gallaecio/top-rules-for-items
Browse files Browse the repository at this point in the history
Implement RulesRegistry.top_rules_for_item
  • Loading branch information
kmike authored Jan 29, 2025
2 parents 3fafcc9 + cff68ac commit ece2bf9
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 2 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
install_requires=[
"attrs >= 21.3.0",
"parsel >= 1.5.0",
"url-matcher >= 0.2.0",
"url-matcher >= 0.4.0",
"multidict >= 0.5.0",
"w3lib >= 1.22.0",
"async-lru >= 1.0.3",
Expand Down
30 changes: 30 additions & 0 deletions tests/test_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,36 @@ class FakeItem:
assert method(cls("https://example.org"), FakeItem) is None


def test_top_rules_for_item() -> None:
registry = RulesRegistry()

assert list(registry.top_rules_for_item("https://example.com", Product)) == []

@registry.handle_urls("https://a.example", priority=1000)
class A1(ProductPage):
pass

@registry.handle_urls("https://a.example", priority=900)
class A2(ProductPage):
pass

assert {
rule.use for rule in registry.top_rules_for_item("https://a.example", Product)
} == {A1}

@registry.handle_urls("https://b.example")
class B1(ProductPage):
pass

@registry.handle_urls("https://b.example")
class B2(ProductPage):
pass

assert {
rule.use for rule in registry.top_rules_for_item("https://b.example", Product)
} == {B1, B2}


def test_from_override_rules_deprecation_using_ApplyRule() -> None:
rules = [
ApplyRule(
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ deps =
aiohttp==3.7.0
attrs==21.3.0
parsel==1.5.0
url-matcher==0.2.0
url-matcher==0.4.0
tldextract==3.0.0
multidict==5.0.0
w3lib==1.22.0
Expand Down
39 changes: 39 additions & 0 deletions web_poet/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
Any,
DefaultDict,
Dict,
Generator,
Iterable,
List,
Mapping,
Expand Down Expand Up @@ -391,6 +392,44 @@ def page_cls_for_item(
matcher = self._item_matchers.get(item_cls)
return self._match_url_for_page_object(url, matcher)

def top_rules_for_item(
self, url: Union[_Url, str], item_cls: Type
) -> Generator[ApplyRule, None, None]:
"""Iterates the top rules that apply for *url* and *item_cls*.
If multiple rules score the same, multiple rules are iterated. This may
be useful, for example, if you want to apply some custom logic to
choose between rules that otherwise have the same score. For example:
.. code-block:: python
from web_poet import default_registry
def browser_page_cls_for_item(url, item_cls):
fallback = None
for rule in default_registry.top_rules_for_item(url, item_cls):
if rule.meta.get("browser", False):
return rule.use
if not fallback:
fallback = rule.use
if not fallback:
raise ValueError(f"No rule found for URL {url!r} and item class {item_cls}")
return fallback
"""
if not url or not item_cls:
return
matcher = self._item_matchers.get(item_cls)
if not matcher:
return
max_priority = None
for rule_id in matcher.match_all(url):
rule = self._rules[rule_id]
if max_priority is None:
max_priority = rule.for_patterns.priority
elif rule.for_patterns.priority < max_priority:
break
yield rule


def _walk_module(module: str) -> Iterable:
"""Return all modules from a module recursively.
Expand Down

0 comments on commit ece2bf9

Please sign in to comment.