Skip to content

Commit 1196b54

Browse files
committed
rssbridge robustness fixes
Signed-off-by: Clemens Vasters <[email protected]>
1 parent bb2ecf3 commit 1196b54

File tree

1 file changed

+49
-31
lines changed

1 file changed

+49
-31
lines changed

rss/rssbridge/rssbridge.py

+49-31
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,13 @@ def load_state():
5454
Returns:
5555
dict: The loaded state.
5656
"""
57-
if os.path.exists(STATE_FILE):
58-
with open(STATE_FILE, "r", encoding="utf-8") as f:
59-
logging.info("Loading state from %s", STATE_FILE)
60-
return json.load(f)
57+
try:
58+
if os.path.exists(STATE_FILE):
59+
with open(STATE_FILE, "r", encoding="utf-8") as f:
60+
logging.info("Loading state from %s", STATE_FILE)
61+
return json.load(f)
62+
except Exception as e:
63+
logging.error("Failed to load state: %s", e)
6164
return {}
6265

6366

@@ -68,9 +71,15 @@ def save_state(state):
6871
Args:
6972
state (dict): The state to save.
7073
"""
71-
with open(STATE_FILE, "w", encoding="utf-8") as f:
72-
logging.info("Saving state to %s", STATE_FILE)
73-
json.dump(state, f)
74+
try:
75+
if not os.path.exists(os.path.dirname(STATE_FILE)):
76+
os.makedirs(os.path.dirname(STATE_FILE))
77+
with open(STATE_FILE, "w", encoding="utf-8") as f:
78+
logging.info("Saving state to %s", STATE_FILE)
79+
json.dump(state, f)
80+
except Exception as e:
81+
logging.error("Failed to save state: %s", e)
82+
7483

7584

7685
def load_feedstore() -> List[str]:
@@ -165,18 +174,21 @@ def add_feed(url: str):
165174

166175
content_type = response.headers.get('Content-Type', '').lower()
167176

168-
if 'text/html' in content_type:
169-
extracted_urls = extract_feed_urls_from_webpage(url)
170-
if not extracted_urls:
171-
logging.debug(f"No feeds found at {url}")
177+
try:
178+
if 'text/html' in content_type:
179+
extracted_urls = extract_feed_urls_from_webpage(url)
180+
if not extracted_urls:
181+
logging.debug(f"No feeds found at {url}")
182+
else:
183+
feed_urls.extend(extracted_urls)
184+
logging.debug(f"Added feed(s) from {url}: {extracted_urls}")
185+
elif 'application/rss+xml' in content_type or 'application/atom+xml' in content_type or 'application/xml' in content_type or 'text/xml' in content_type:
186+
feed_urls.append(url)
187+
logging.debug(f"Added feed {url}")
172188
else:
173-
feed_urls.extend(extracted_urls)
174-
logging.debug(f"Added feed(s) from {url}: {extracted_urls}")
175-
elif 'application/rss+xml' in content_type or 'application/atom+xml' in content_type or 'application/xml' in content_type or 'text/xml' in content_type:
176-
feed_urls.append(url)
177-
logging.debug(f"Added feed {url}")
178-
else:
179-
logging.debug(f"Unsupported content type {content_type} at {url}")
189+
logging.debug(f"Unsupported content type {content_type} at {url}")
190+
except Exception as e:
191+
logging.error("Error processing %s: %s", url, e)
180192

181193
save_feedstore(list(set(feed_urls)))
182194

@@ -344,21 +356,25 @@ def fetch_feed(url: str, etag: Optional[str] = None) -> requests.Response:
344356
requests.Response: The HTTP response object.
345357
"""
346358

347-
headers = {
348-
'User-Agent': USER_AGENT,
349-
'Accept': 'application/atom+xml, application/rss+xml, application/xml, text/xml',
350-
'Accept-Encoding': 'gzip, deflate, br, zstd',
351-
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8,de;q=0.7,de-DE;q=0.6,ko;q=0.5',
352-
}
353-
if etag:
354-
headers['If-None-Match'] = etag
359+
try:
360+
headers = {
361+
'User-Agent': USER_AGENT,
362+
'Accept': 'application/atom+xml, application/rss+xml, application/xml, text/xml',
363+
'Accept-Encoding': 'gzip, deflate, br, zstd',
364+
'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8,de;q=0.7,de-DE;q=0.6,ko;q=0.5',
365+
}
366+
if etag:
367+
headers['If-None-Match'] = etag
355368

356-
response = requests.get(url, headers=headers, timeout=10)
357-
logging.info("%s: Response status code: %s", url, response.status_code)
358-
if response.status_code == 304:
369+
response = requests.get(url, headers=headers, timeout=10)
370+
logging.info("%s: Response status code: %s", url, response.status_code)
371+
if response.status_code == 304:
372+
return response
373+
response.raise_for_status()
359374
return response
360-
response.raise_for_status()
361-
return response
375+
except requests.RequestException as e:
376+
logging.error("Failed to fetch %s: %s", url, e)
377+
raise e
362378

363379

364380
async def process_feed(feed_url: str, state: dict, producer_instance: MicrosoftOpenDataRssFeedsEventProducer):
@@ -467,6 +483,8 @@ async def process_feed(feed_url: str, state: dict, producer_instance: MicrosoftO
467483
state[feed_url] = {}
468484
state[feed_url]["skip"] = True
469485
logging.debug(f"Skipping {feed_url} due to 404/403 response")
486+
else:
487+
logging.debug(f"Error processing feed {feed_url}: {e}")
470488
else:
471489
logging.debug(f"Error processing feed {feed_url}: {e}")
472490

0 commit comments

Comments
 (0)