@@ -54,10 +54,13 @@ def load_state():
54
54
Returns:
55
55
dict: The loaded state.
56
56
"""
57
- if os .path .exists (STATE_FILE ):
58
- with open (STATE_FILE , "r" , encoding = "utf-8" ) as f :
59
- logging .info ("Loading state from %s" , STATE_FILE )
60
- return json .load (f )
57
+ try :
58
+ if os .path .exists (STATE_FILE ):
59
+ with open (STATE_FILE , "r" , encoding = "utf-8" ) as f :
60
+ logging .info ("Loading state from %s" , STATE_FILE )
61
+ return json .load (f )
62
+ except Exception as e :
63
+ logging .error ("Failed to load state: %s" , e )
61
64
return {}
62
65
63
66
@@ -68,9 +71,15 @@ def save_state(state):
68
71
Args:
69
72
state (dict): The state to save.
70
73
"""
71
- with open (STATE_FILE , "w" , encoding = "utf-8" ) as f :
72
- logging .info ("Saving state to %s" , STATE_FILE )
73
- json .dump (state , f )
74
+ try :
75
+ if not os .path .exists (os .path .dirname (STATE_FILE )):
76
+ os .makedirs (os .path .dirname (STATE_FILE ))
77
+ with open (STATE_FILE , "w" , encoding = "utf-8" ) as f :
78
+ logging .info ("Saving state to %s" , STATE_FILE )
79
+ json .dump (state , f )
80
+ except Exception as e :
81
+ logging .error ("Failed to save state: %s" , e )
82
+
74
83
75
84
76
85
def load_feedstore () -> List [str ]:
@@ -165,18 +174,21 @@ def add_feed(url: str):
165
174
166
175
content_type = response .headers .get ('Content-Type' , '' ).lower ()
167
176
168
- if 'text/html' in content_type :
169
- extracted_urls = extract_feed_urls_from_webpage (url )
170
- if not extracted_urls :
171
- logging .debug (f"No feeds found at { url } " )
177
+ try :
178
+ if 'text/html' in content_type :
179
+ extracted_urls = extract_feed_urls_from_webpage (url )
180
+ if not extracted_urls :
181
+ logging .debug (f"No feeds found at { url } " )
182
+ else :
183
+ feed_urls .extend (extracted_urls )
184
+ logging .debug (f"Added feed(s) from { url } : { extracted_urls } " )
185
+ elif 'application/rss+xml' in content_type or 'application/atom+xml' in content_type or 'application/xml' in content_type or 'text/xml' in content_type :
186
+ feed_urls .append (url )
187
+ logging .debug (f"Added feed { url } " )
172
188
else :
173
- feed_urls .extend (extracted_urls )
174
- logging .debug (f"Added feed(s) from { url } : { extracted_urls } " )
175
- elif 'application/rss+xml' in content_type or 'application/atom+xml' in content_type or 'application/xml' in content_type or 'text/xml' in content_type :
176
- feed_urls .append (url )
177
- logging .debug (f"Added feed { url } " )
178
- else :
179
- logging .debug (f"Unsupported content type { content_type } at { url } " )
189
+ logging .debug (f"Unsupported content type { content_type } at { url } " )
190
+ except Exception as e :
191
+ logging .error ("Error processing %s: %s" , url , e )
180
192
181
193
save_feedstore (list (set (feed_urls )))
182
194
@@ -344,21 +356,25 @@ def fetch_feed(url: str, etag: Optional[str] = None) -> requests.Response:
344
356
requests.Response: The HTTP response object.
345
357
"""
346
358
347
- headers = {
348
- 'User-Agent' : USER_AGENT ,
349
- 'Accept' : 'application/atom+xml, application/rss+xml, application/xml, text/xml' ,
350
- 'Accept-Encoding' : 'gzip, deflate, br, zstd' ,
351
- 'Accept-Language' : 'en-GB,en-US;q=0.9,en;q=0.8,de;q=0.7,de-DE;q=0.6,ko;q=0.5' ,
352
- }
353
- if etag :
354
- headers ['If-None-Match' ] = etag
359
+ try :
360
+ headers = {
361
+ 'User-Agent' : USER_AGENT ,
362
+ 'Accept' : 'application/atom+xml, application/rss+xml, application/xml, text/xml' ,
363
+ 'Accept-Encoding' : 'gzip, deflate, br, zstd' ,
364
+ 'Accept-Language' : 'en-GB,en-US;q=0.9,en;q=0.8,de;q=0.7,de-DE;q=0.6,ko;q=0.5' ,
365
+ }
366
+ if etag :
367
+ headers ['If-None-Match' ] = etag
355
368
356
- response = requests .get (url , headers = headers , timeout = 10 )
357
- logging .info ("%s: Response status code: %s" , url , response .status_code )
358
- if response .status_code == 304 :
369
+ response = requests .get (url , headers = headers , timeout = 10 )
370
+ logging .info ("%s: Response status code: %s" , url , response .status_code )
371
+ if response .status_code == 304 :
372
+ return response
373
+ response .raise_for_status ()
359
374
return response
360
- response .raise_for_status ()
361
- return response
375
+ except requests .RequestException as e :
376
+ logging .error ("Failed to fetch %s: %s" , url , e )
377
+ raise e
362
378
363
379
364
380
async def process_feed (feed_url : str , state : dict , producer_instance : MicrosoftOpenDataRssFeedsEventProducer ):
@@ -467,6 +483,8 @@ async def process_feed(feed_url: str, state: dict, producer_instance: MicrosoftO
467
483
state [feed_url ] = {}
468
484
state [feed_url ]["skip" ] = True
469
485
logging .debug (f"Skipping { feed_url } due to 404/403 response" )
486
+ else :
487
+ logging .debug (f"Error processing feed { feed_url } : { e } " )
470
488
else :
471
489
logging .debug (f"Error processing feed { feed_url } : { e } " )
472
490
0 commit comments