Skip to content

Commit

Permalink
noop: minor Web.fetch refactoring for validating input url
Browse files Browse the repository at this point in the history
  • Loading branch information
snarfed committed Mar 7, 2025
1 parent b9d7ce8 commit 1464286
Showing 1 changed file with 2 additions and 8 deletions.
10 changes: 2 additions & 8 deletions web.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,21 +542,15 @@ def fetch(cls, obj, gateway=False, check_backlink=False,
"""
url = obj.key.id()

if not util.is_web(url):
logger.info(f'{url} is not a URL')
return False

try:
parsed = urlparse(url)
except ValueError as e:
if not util.is_web(url) or not util.is_url(url):
logger.info(f'{url} is not a URL')
return False

if (cls.is_blocklisted(url, allow_internal=True)
or util.domain_or_parent_in(domain_from_link(url), FETCH_BLOCKLIST)):
return False

is_homepage = parsed.path.strip('/') == ''
is_homepage = urlparse(url).path.strip('/') == ''
if is_homepage:
domain = domain_from_link(url)
if domain == PRIMARY_DOMAIN or domain in PROTOCOL_DOMAINS:
Expand Down

0 comments on commit 1464286

Please sign in to comment.