Skip to content

Commit 1853125

Browse files
committed
Added: Correctly handling unsupported content (see: http://doc.qt.io/qt-5.5/qwebpage.html#unsupportedContent). Fixes #91
1 parent 2b1a390 commit 1853125

File tree

3 files changed

+86
-2
lines changed

3 files changed

+86
-2
lines changed

splash/browser_tab.py

+52-2
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from splash.qtutils import (OPERATION_QT_CONSTANTS, WrappedSignal, qt2py,
2121
qurl2ascii, to_qurl)
2222
from splash.render_options import validate_size_str
23-
from splash.qwebpage import SplashQWebPage, SplashQWebView
23+
from splash.qwebpage import SplashQWebPage, SplashQWebView, RenderErrorInfo
2424
from splash.exceptions import JsError, OneShotCallbackError
2525
from splash.utils import to_bytes, escape_js
2626

@@ -64,6 +64,9 @@ def __init__(self, network_manager, splash_proxy_factory, verbosity,
6464
self._callback_proxies_to_cancel = weakref.WeakSet()
6565
self._js_console = None
6666
self._autoload_scripts = []
67+
self._is_unsupported_content = False
68+
self._unsupported_content_reply = None
69+
self._load_finished_after_unsupported_content_ready = False
6770

6871
self.logger = _BrowserTabLogger(uid=self._uid, verbosity=verbosity)
6972
self._init_webpage(verbosity, network_manager, splash_proxy_factory,
@@ -133,6 +136,8 @@ def _setup_webpage_events(self):
133136
self.web_page.mainFrame().loadFinished.connect(self._on_load_finished)
134137
self.web_page.mainFrame().urlChanged.connect(self._on_url_changed)
135138
self.web_page.mainFrame().javaScriptWindowObjectCleared.connect(self._on_javascript_window_object_cleared)
139+
self.web_page.setForwardUnsupportedContent(True)
140+
self.web_page.unsupportedContent.connect(self._on_unsupported_content)
136141
self.logger.add_web_page(self.web_page)
137142

138143
def return_result(self, result):
@@ -372,6 +377,15 @@ def _on_load_finished(self, ok):
372377
This callback is called for all web_page.mainFrame()
373378
loadFinished events.
374379
"""
380+
if self._is_unsupported_content:
381+
if self._unsupported_content_reply.isRunning():
382+
# XXX: We'll come back later when download finishes
383+
self.logger.log(
384+
'Still receving unsupported content', min_level=3)
385+
return
386+
else:
387+
self._load_finished_after_unsupported_content_ready = True
388+
self.logger.log('Unsupported content received', min_level=3)
375389
if self.web_page.maybe_redirect(ok):
376390
self.logger.log("Redirect or other non-fatal error detected", min_level=2)
377391
return
@@ -419,7 +433,11 @@ def _on_content_ready(self, ok, callback, errback, callback_id):
419433
"""
420434
This method is called when a QWebPage finishes loading its contents.
421435
"""
422-
if self.web_page.maybe_redirect(ok):
436+
if self._is_unsupported_content:
437+
if self._unsupported_content_reply.isRunning():
438+
# XXX: We'll come back later when download finishes
439+
return
440+
elif self.web_page.maybe_redirect(ok):
423441
# XXX: It assumes loadFinished will be called again because
424442
# redirect happens. If redirect is detected improperly,
425443
# loadFinished won't be called again, and Splash will return
@@ -431,6 +449,16 @@ def _on_content_ready(self, ok, callback, errback, callback_id):
431449

432450
if self.web_page.is_ok(ok):
433451
callback()
452+
elif self._is_unsupported_content:
453+
# XXX: Error downloading unsupported content.
454+
# `self.web_page.error_info` shall be `None` now
455+
error_info = RenderErrorInfo(
456+
'Network',
457+
int(self._unsupported_content_reply.error()),
458+
six.text_type(self._unsupported_content_reply.errorString()),
459+
six.text_type(self._unsupported_content_reply.url().url())
460+
)
461+
errback(error_info)
434462
elif self.web_page.error_loading(ok):
435463
# XXX: maybe return a meaningful error page instead of generic
436464
# error message?
@@ -505,6 +533,28 @@ def _on_url_changed(self, url):
505533
self.web_page.har.store_redirect(six.text_type(url.toString()))
506534
self._cancel_timers(self._timers_to_cancel_on_redirect)
507535

536+
def _on_unsupported_content_finished(self):
537+
self.logger.log('Unsupported content finished', min_level=3)
538+
if not self._load_finished_after_unsupported_content_ready:
539+
# XXX: The unsupported content reply might have finished before the
540+
# original loadFinished signal emits. In such cases we do not want
541+
# the same signal twice.
542+
if not self._unsupported_content_reply.error():
543+
self.web_page.mainFrame().loadFinished.emit(True)
544+
else:
545+
self.web_page.mainFrame().loadFinished.emit(False)
546+
547+
def _on_unsupported_content(self, reply):
548+
self.logger.log('Unsupported content detected', min_level=3)
549+
self._is_unsupported_content = True
550+
self._unsupported_content_reply = reply
551+
if reply.isFinished():
552+
# Already finished. The content might be very short.
553+
self.logger.log('Unsupported content already finished', min_level=3)
554+
self._on_unsupported_content_finished()
555+
else:
556+
reply.finished.connect(self._on_unsupported_content_finished)
557+
508558
def run_js_file(self, filename, handle_errors=True):
509559
"""
510560
Load JS library from file ``filename`` to the current frame.

splash/tests/mockserver.py

+19
Original file line numberDiff line numberDiff line change
@@ -743,6 +743,23 @@ def render_GET(self, request):
743743
return b"ok"
744744

745745

746+
class RawBytes(Resource):
747+
748+
def render_GET(self, request):
749+
body_length = int(request.args.get(b'length', [1024])[0])
750+
body = b'0' * body_length
751+
claim_length = int(request.args.get(b'claim_length', [body_length])[0])
752+
content = b'\n'.join([
753+
b'HTTP/1.1 200 OK',
754+
('Content-Length: %d' % claim_length).encode('utf8'),
755+
b'',
756+
body,
757+
])
758+
request.channel.transport.write(content)
759+
request.channel.transport.loseConnection()
760+
return NOT_DONE_YET # Already done
761+
762+
746763
class Index(Resource):
747764
isLeaf = True
748765

@@ -820,6 +837,8 @@ def __init__(self, http_port, https_port, proxy_port):
820837
self.putChild(b"bad-content-type", InvalidContentTypeResource())
821838
self.putChild(b"bad-content-type2", InvalidContentTypeResource2())
822839

840+
self.putChild(b"raw-bytes", RawBytes())
841+
823842
self.putChild(b"jsredirect", JsRedirect())
824843
self.putChild(b"jsredirect-to", JsRedirectTo())
825844
self.putChild(b"jsredirect-slowimage", JsRedirectSlowImage())

splash/tests/test_render.py

+15
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,21 @@ def test_invalid_wait(self):
208208
'wait': wait})
209209
self.assertStatusCode(r, 400)
210210

211+
def test_unsupported_content(self):
212+
cases = [
213+
# Short body (Can be received together with the headers)
214+
("raw-bytes?length=16", 200),
215+
# Short body with error
216+
("raw-bytes?length=16&claim_length=100", 502),
217+
# Long body (Can't be received together with the headers)
218+
("raw-bytes?length=10000", 200),
219+
# Long body with error
220+
("raw-bytes?length=10000&claim_length=20000", 502),
221+
]
222+
for url, http_status in cases:
223+
r = self.request({"url": self.mockurl(url)})
224+
self.assertStatusCode(r, http_status)
225+
211226
@pytest.mark.skipif(
212227
not qt_551_plus(),
213228
reason="resource_timeout doesn't work in Qt5 < 5.5.1. See issue #269 for details."

0 commit comments

Comments
 (0)