@@ -36,8 +36,10 @@ def __init__(self, crawler, splash_base_url, slot_policy):
36
36
37
37
@classmethod
38
38
def from_crawler (cls , crawler ):
39
- splash_base_url = crawler .settings .get ('SPLASH_URL' , cls .default_splash_url )
40
- slot_policy = crawler .settings .get ('SPLASH_SLOT_POLICY' , cls .default_policy )
39
+ splash_base_url = crawler .settings .get ('SPLASH_URL' ,
40
+ cls .default_splash_url )
41
+ slot_policy = crawler .settings .get ('SPLASH_SLOT_POLICY' ,
42
+ cls .default_policy )
41
43
42
44
if slot_policy not in SlotPolicy ._known :
43
45
raise NotConfigured ("Incorrect slot policy: %r" % slot_policy )
@@ -49,17 +51,20 @@ def process_request(self, request, spider):
49
51
if not splash_options :
50
52
return
51
53
54
+ if request .meta .get ("_splash_processed" ):
55
+ # don't process the same request more than once
56
+ return
57
+
52
58
if request .method != 'GET' :
53
59
logger .warn (
54
- "Currently only GET requests are supported by SplashMiddleware; "
55
- "%(request)s will be handled without Splash" ,
60
+ "Currently only GET requests are supported by SplashMiddleware;"
61
+ " %(request)s will be handled without Splash" ,
56
62
{'request' : request },
57
63
extra = {'spider' : spider }
58
64
)
59
65
return request
60
66
61
67
meta = request .meta
62
- del meta ['splash' ]
63
68
meta ['_splash_processed' ] = splash_options
64
69
65
70
slot_policy = splash_options .get ('slot_policy' , self .slot_policy )
@@ -83,7 +88,9 @@ def process_request(self, request, spider):
83
88
# But we can change Scrapy `download_timeout`: increase
84
89
# it when it's too small. Decreasing `download_timeout` is not
85
90
# safe.
86
- timeout_current = meta .get ('download_timeout' , 1e6 ) # no timeout means infinite timeout
91
+
92
+ # no timeout means infinite timeout
93
+ timeout_current = meta .get ('download_timeout' , 1e6 )
87
94
timeout_expected = float (args ['timeout' ]) + self .splash_extra_timeout
88
95
89
96
if timeout_expected > timeout_current :
@@ -131,4 +138,6 @@ def _set_download_slot(self, request, meta, slot_policy):
131
138
pass
132
139
133
140
def _get_slot_key (self , request_or_response ):
134
- return self .crawler .engine .downloader ._get_slot_key (request_or_response , None )
141
+ return self .crawler .engine .downloader ._get_slot_key (
142
+ request_or_response , None
143
+ )
0 commit comments