Skip to content

Commit

Permalink
implement 'downloader' options per extractor category
Browse files Browse the repository at this point in the history
by setting options inside 'http' or 'ytdl' inside extractor options
or inside subcategory options

{
    "extractor": {
        "mastodon": {
            "http": {
                "rate": "10k"
            }
        },
        "mastodon.social": {
            "http": {
                "rate": "100k"
            }
        }
    },
    "downloader": {
        "rate": "100m"
    }
}

Sets download speed to
-  10k for mastodon.social URLs
- 100k for mastodon sites in general
- 100m for all other sites
  • Loading branch information
mikf committed Feb 22, 2025
1 parent 4906541 commit 18ed39c
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 4 deletions.
51 changes: 48 additions & 3 deletions gallery_dl/downloader/common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-

# Copyright 2014-2022 Mike Fährmann
# Copyright 2014-2025 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
Expand All @@ -17,8 +17,15 @@ class DownloaderBase():
scheme = ""

def __init__(self, job):
extractor = job.extractor

opts = self._extractor_config(extractor)
if opts:
self.opts = opts
self.config = self.config_opts

self.out = job.out
self.session = job.extractor.session
self.session = extractor.session
self.part = self.config("part", True)
self.partdir = self.config("part-directory")
self.log = job.get_logger("downloader." + self.scheme)
Expand All @@ -29,13 +36,51 @@ def __init__(self, job):

proxies = self.config("proxy", util.SENTINEL)
if proxies is util.SENTINEL:
self.proxies = job.extractor._proxies
self.proxies = extractor._proxies
else:
self.proxies = util.build_proxy_map(proxies, self.log)

def config(self, key, default=None):
"""Interpolate downloader config value for 'key'"""
return config.interpolate(("downloader", self.scheme), key, default)

def config_opts(self, key, default=None):
value = self.opts.get(key, util.SENTINEL)
if value is not util.SENTINEL:
return value
return config.interpolate(("downloader", self.scheme), key, default)

def _extractor_config(self, extractor):
path = extractor._cfgpath
if not isinstance(path, list):
return self._extractor_opts(path[1], path[2])

opts = {}
for cat, sub in reversed(path):
popts = self._extractor_opts(cat, sub)
if popts:
opts.update(popts)
return opts

def _extractor_opts(self, category, subcategory):
cfg = config.get(("extractor",), category)
if not cfg:
return None

copts = cfg.get(self.scheme)
if copts:
if subcategory in cfg:
sopts = cfg[subcategory].get(self.scheme)
if sopts:
opts = copts.copy()
opts.update(sopts)
return opts
return copts

if subcategory in cfg:
return cfg[subcategory].get(self.scheme)

return None

def download(self, url, pathfmt):
"""Write data from 'url' into the file specified by 'pathfmt'"""
61 changes: 60 additions & 1 deletion test/test_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import threading
import http.server


sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from gallery_dl import downloader, extractor, output, config, path # noqa E402
from gallery_dl.downloader.http import MIME_TYPES, SIGNATURE_CHECKS # noqa E402
Expand Down Expand Up @@ -55,6 +54,9 @@ def tearDownClass(cls):
else:
del sys.modules["youtube_dl"]

def setUp(self):
downloader._cache.clear()

def tearDown(self):
downloader._cache.clear()

Expand Down Expand Up @@ -107,6 +109,63 @@ def test_cache_https(self, import_module):
self.assertEqual(import_module.call_count, 1)


class TestDownloaderConfig(unittest.TestCase):

def setUp(self):
config.clear()

def tearDown(self):
config.clear()

def test_default_http(self):
job = FakeJob()
extr = job.extractor
dl = downloader.find("http")(job)

self.assertEqual(dl.adjust_extension, True)
self.assertEqual(dl.chunk_size, 32768)
self.assertEqual(dl.metadata, None)
self.assertEqual(dl.progress, 3.0)
self.assertEqual(dl.validate, True)
self.assertEqual(dl.headers, None)
self.assertEqual(dl.minsize, None)
self.assertEqual(dl.maxsize, None)
self.assertEqual(dl.mtime, True)
self.assertEqual(dl.rate, None)
self.assertEqual(dl.part, True)
self.assertEqual(dl.partdir, None)

self.assertIs(dl.interval_429, extr._interval_429)
self.assertIs(dl.retry_codes, extr._retry_codes)
self.assertIs(dl.retries, extr._retries)
self.assertIs(dl.timeout, extr._timeout)
self.assertIs(dl.proxies, extr._proxies)
self.assertIs(dl.verify, extr._verify)

def test_config_http(self):
config.set((), "rate", 42)
config.set((), "mtime", False)
config.set((), "headers", {"foo": "bar"})
config.set(("downloader",), "retries", -1)
config.set(("downloader", "http"), "filesize-min", "10k")
config.set(("extractor", "generic"), "verify", False)
config.set(("extractor", "generic", "example.org"), "timeout", 10)
config.set(("extractor", "generic", "http"), "rate", "1k")
config.set(
("extractor", "generic", "example.org", "http"), "headers", {})

job = FakeJob()
dl = downloader.find("http")(job)

self.assertEqual(dl.headers, {})
self.assertEqual(dl.minsize, 10240)
self.assertEqual(dl.retries, float("inf"))
self.assertEqual(dl.timeout, 10)
self.assertEqual(dl.verify, False)
self.assertEqual(dl.mtime, False)
self.assertEqual(dl.rate, 1024)


class TestDownloaderBase(unittest.TestCase):

@classmethod
Expand Down

0 comments on commit 18ed39c

Please sign in to comment.