Skip to content

Commit

Permalink
[nhentai] fix extraction (closes #156)
Browse files Browse the repository at this point in the history
Use JSON embedded in webpage since API endpoints have been disabled
  • Loading branch information
mikf committed Jan 14, 2019
1 parent 5f38ac9 commit 751e535
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 23 deletions.
41 changes: 19 additions & 22 deletions gallery_dl/extractor/nhentai.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

# Copyright 2015-2018 Mike Fährmann
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
Expand All @@ -10,6 +10,7 @@

from .common import Extractor, Message
from .. import text
import json


class NHentaiExtractor(Extractor):
Expand Down Expand Up @@ -68,45 +69,41 @@ def items(self):

def get_gallery_info(self, gallery_id):
"""Extract and return info about a gallery by ID"""
url = "{}/api/gallery/{}".format(self.root, gallery_id)
return self.request(url).json()
url = "{}/g/{}".format(self.root, gallery_id)
page = self.request(url).text
return json.loads(text.extract(page, "N.gallery(", ");")[0])


class NhentaiSearchExtractor(NHentaiExtractor):
"""Extractor for nhentai search results"""
category = "nhentai"
subcategory = "search"
pattern = [r"(?:https?://)?nhentai\.net/search/?\?(.*)"]
pattern = [r"(?:https?://)?nhentai\.net/search/?\?([^#]+)"]
test = [("https://nhentai.net/search/?q=touhou", {
"pattern": NhentaiGalleryExtractor.pattern[0],
"count": 30,
"range": "1-30",
})]

def __init__(self, match):
NHentaiExtractor.__init__(self)
self.params = text.parse_query(match.group(1))

if "q" in self.params:
self.params["query"] = self.params["q"]
del self.params["q"]

def items(self):
yield Message.Version, 1
for ginfo in self._pagination("galleries/search", self.params):
url = "{}/g/{}/".format(self.root, ginfo["id"])
yield Message.Queue, url, self.transform_to_metadata(ginfo)
for gid in self._pagination(self.params):
url = "{}/g/{}/".format(self.root, gid)
yield Message.Queue, url, {}

def _pagination(self, endpoint, params):
"""Pagination over API responses"""
url = "{}/api/{}".format(self.root, endpoint)
def _pagination(self, params):
url = "{}/search/".format(self.root)
params["page"] = text.parse_int(params.get("page"), 1)

while True:
data = self.request(
url, params=params, expect=range(400, 500)).json()

if "error" in data:
self.log.error("API request failed: \"%s\"", data["error"])
return
page = self.request(url, params=params).text

yield from data["result"]
yield from text.extract_iter(page, 'href="/g/', '/')

if params["page"] >= data["num_pages"]:
if 'class="next"' not in page:
return
params["page"] += 1
2 changes: 1 addition & 1 deletion gallery_dl/extractor/smugmug.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class SmugmugImageExtractor(SmugmugExtractor):
# no "ImageOwner"
("https://www.smugmug.com/gallery/n-GLCjnD/i-JD62fQk", {
"url": "d4047637947b35e4ef49e3c7cb70303cc224a3a0",
"keyword": "96fc43bc3081f6356c929be43ab5971009975063",
"keyword": "0a1b12efd789c42d9b061f01b2a1fcfd6af32003",
}),
]

Expand Down
1 change: 1 addition & 0 deletions test/test_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

# temporary issues, etc.
BROKEN = {
"hbrowse",
"pinterest",
}

Expand Down

0 comments on commit 751e535

Please sign in to comment.