-
-
Notifications
You must be signed in to change notification settings - Fork 1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
* Add TikTok photo support #3061 #4177 * Address linting errors * Fix more test failures * Forgot to update category names in tests * Looking into re issue * Follow default yt-dlp output template * Fix format string error on 3.5 * Support downloading videos and audio Respond to comments Improve archiving and file naming * Forgot to update supportedsites.md * Support user profiles * Fix indentation * Prevent matching with more than one TikTok extractor * Fix TikTok regex * Support TikTok profile avatars * Fix supportedsites.md * TikTok: Ignore no formats error In my limited experience, this doesn't mean that gallery-dl can't download the photo post (but this could mean that you can't download the audio) * Fix error reporting message * TikTok: Support more URL formats vt.tiktok.com www.tiktok.com/t/ * TikTok: Only download avatar when extracting user profile * TikTok: Document profile avatar limitation * TikTok: Add support for www.tiktokv.com/share links * Address Share -> Sharepost issue * TikTok: Export post's creation date in JSON (ISO 8601) * [tiktok] update * [tiktok] update 'vmpost' handling just perform a HEAD request and handle its response * [tiktok] build URLs from post IDs instead of reusing unchanged input URLs * [tiktok] combine 'post' and 'sharepost' extractors * [tiktok] update default filenames put 'id' and 'num' first to ensure better file order * [tiktok] improve ytdl usage - speed up extraction by passing '"extract_flat": True' - pass more user options and cookies - pre-define 'TikTokUser' extractor usage * [tiktok] Add _COOKIES entry to AUTH_MAP * [tiktok] Always download user avatars * [tiktok] Add more documentation to supportedsites.md * [tiktok] Address review comments --------- Co-authored-by: Mike Fährmann <[email protected]>
- Loading branch information
1 parent
a9853cd
commit daac2c6
Showing
5 changed files
with
521 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -171,6 +171,7 @@ | |
"tapas", | ||
"tcbscans", | ||
"telegraph", | ||
"tiktok", | ||
"tmohentai", | ||
"toyhouse", | ||
"tsumino", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,242 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
# This program is free software; you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License version 2 as | ||
# published by the Free Software Foundation. | ||
|
||
"""Extractors for https://www.tiktok.com/""" | ||
|
||
from .common import Extractor, Message | ||
from .. import text, util, ytdl, exception | ||
|
||
BASE_PATTERN = r"(?:https?://)?(?:www\.)?tiktokv?\.com" | ||
|
||
|
||
class TiktokExtractor(Extractor): | ||
"""Base class for TikTok extractors""" | ||
category = "tiktok" | ||
directory_fmt = ("{category}", "{user}") | ||
filename_fmt = ( | ||
"{id}{num:?_//>02} {title[b:150]}{img_id:? [/]/}.{extension}") | ||
archive_fmt = "{id}_{num}_{img_id}" | ||
root = "https://www.tiktok.com" | ||
cookies_domain = ".tiktok.com" | ||
|
||
def avatar(self): | ||
return "" | ||
|
||
def items(self): | ||
videos = self.config("videos", True) | ||
# We assume that all of the URLs served by urls() come from the same | ||
# author. | ||
downloaded_avatar = not self.avatar() | ||
|
||
for tiktok_url in self.urls(): | ||
tiktok_url = self._sanitize_url(tiktok_url) | ||
data = self._extract_rehydration_data(tiktok_url) | ||
if "webapp.video-detail" not in data: | ||
# Only /video/ links result in the video-detail dict we need. | ||
# Try again using that form of link. | ||
tiktok_url = self._sanitize_url( | ||
data["seo.abtest"]["canonical"]) | ||
data = self._extract_rehydration_data(tiktok_url) | ||
video_detail = data["webapp.video-detail"] | ||
|
||
if not self._check_status_code(video_detail, tiktok_url): | ||
continue | ||
|
||
post = video_detail["itemInfo"]["itemStruct"] | ||
author = post["author"] | ||
post["user"] = user = author["uniqueId"] | ||
post["date"] = text.parse_timestamp(post["createTime"]) | ||
original_title = title = post["desc"] | ||
if not title: | ||
title = "TikTok photo #{}".format(post["id"]) | ||
|
||
if not downloaded_avatar: | ||
avatar_url = author["avatarLarger"] | ||
avatar = self._generate_avatar( | ||
avatar_url, post, user, author["id"]) | ||
yield Message.Directory, avatar | ||
yield Message.Url, avatar_url, avatar | ||
downloaded_avatar = True | ||
|
||
yield Message.Directory, post | ||
if "imagePost" in post: | ||
img_list = post["imagePost"]["images"] | ||
for i, img in enumerate(img_list, 1): | ||
url = img["imageURL"]["urlList"][0] | ||
text.nameext_from_url(url, post) | ||
post.update({ | ||
"type" : "image", | ||
"image" : img, | ||
"title" : title, | ||
"num" : i, | ||
"img_id": post["filename"].partition("~")[0], | ||
"width" : img["imageWidth"], | ||
"height": img["imageHeight"], | ||
}) | ||
yield Message.Url, url, post | ||
|
||
elif videos: | ||
if not original_title: | ||
title = "TikTok video #{}".format(post["id"]) | ||
|
||
else: | ||
self.log.info("%s: Skipping post", tiktok_url) | ||
|
||
if videos: | ||
post.update({ | ||
"type" : "video", | ||
"image" : None, | ||
"filename" : "", | ||
"extension" : "mp4", | ||
"title" : title, | ||
"num" : 0, | ||
"img_id" : "", | ||
"width" : 0, | ||
"height" : 0, | ||
}) | ||
yield Message.Url, "ytdl:" + tiktok_url, post | ||
|
||
# If we couldn't download the avatar because the given user has no | ||
# posts, we'll need to make a separate request for the user's page | ||
# and download the avatar that way. | ||
if not downloaded_avatar: | ||
user_name = self.avatar() | ||
profile_url = "https://www.tiktok.com/@{}".format(user_name) | ||
data = self._extract_rehydration_data(profile_url) | ||
data = data["webapp.user-detail"]["userInfo"]["user"] | ||
data["user"] = user_name | ||
avatar_url = data["avatarLarger"] | ||
avatar = self._generate_avatar( | ||
avatar_url, data, user_name, data["id"]) | ||
yield Message.Directory, avatar | ||
yield Message.Url, avatar_url, avatar | ||
|
||
def _generate_avatar(self, avatar_url, data, user_name, user_id): | ||
avatar = text.nameext_from_url(avatar_url, data.copy()) | ||
avatar.update({ | ||
"type" : "avatar", | ||
"title" : "@" + user_name, | ||
"id" : user_id, | ||
"img_id": avatar["filename"].partition("~")[0], | ||
"num" : 0, | ||
}) | ||
return avatar | ||
|
||
def _sanitize_url(self, url): | ||
return text.ensure_http_scheme(url.replace("/photo/", "/video/", 1)) | ||
|
||
def _extract_rehydration_data(self, url): | ||
html = self.request(url).text | ||
data = text.extr( | ||
html, '<script id="__UNIVERSAL_DATA_FOR_REHYDRATION__" ' | ||
'type="application/json">', '</script>') | ||
return util.json_loads(data)["__DEFAULT_SCOPE__"] | ||
|
||
def _check_status_code(self, detail, url): | ||
status = detail.get("statusCode") | ||
if not status: | ||
return True | ||
|
||
if status == 10222: | ||
self.log.error("%s: Login required to access this post", url) | ||
elif status == 10204: | ||
self.log.error("%s: Requested post not available", url) | ||
elif status == 10231: | ||
self.log.error("%s: Region locked - Try downloading with a" | ||
"VPN/proxy connection", url) | ||
else: | ||
self.log.error( | ||
"%s: Received unknown error code %s ('%s')", | ||
url, status, detail.get("statusMsg") or "") | ||
return False | ||
|
||
|
||
class TiktokPostExtractor(TiktokExtractor): | ||
"""Extract a single video or photo TikTok link""" | ||
subcategory = "post" | ||
pattern = BASE_PATTERN + r"/(?:@([\w_.-]*)|share)/(?:phot|vide)o/(\d+)" | ||
example = "https://www.tiktok.com/@USER/photo/1234567890" | ||
|
||
def urls(self): | ||
user, post_id = self.groups | ||
url = "{}/@{}/video/{}".format(self.root, user or "", post_id) | ||
return (url,) | ||
|
||
|
||
class TiktokVmpostExtractor(TiktokExtractor): | ||
"""Extract a single video or photo TikTok VM link""" | ||
subcategory = "vmpost" | ||
pattern = (r"(?:https?://)?(?:" | ||
r"(?:v[mt]\.)?tiktok\.com|(?:www\.)?tiktok\.com/t" | ||
r")/(?!@)([^/?#]+)") | ||
example = "https://vm.tiktok.com/1a2B3c4E5" | ||
|
||
def items(self): | ||
url = text.ensure_http_scheme(self.url) | ||
headers = {"User-Agent": "facebookexternalhit/1.1"} | ||
|
||
response = self.request(url, headers=headers, method="HEAD", | ||
allow_redirects=False, notfound="post") | ||
|
||
url = response.headers.get("Location") | ||
if not url or len(url) <= 28: | ||
# https://www.tiktok.com/?_r=1 | ||
raise exception.NotFoundError("post") | ||
|
||
data = {"_extractor": TiktokPostExtractor} | ||
yield Message.Queue, url.partition("?")[0], data | ||
|
||
|
||
class TiktokUserExtractor(TiktokExtractor): | ||
"""Extract a TikTok user's profile""" | ||
subcategory = "user" | ||
pattern = BASE_PATTERN + r"/@([\w_.-]+)/?(?:$|\?|#)" | ||
example = "https://www.tiktok.com/@USER" | ||
|
||
def urls(self): | ||
"""Attempt to use yt-dlp/youtube-dl to extract links from a | ||
user's page""" | ||
|
||
try: | ||
module = ytdl.import_module(self.config("module")) | ||
except (ImportError, SyntaxError) as exc: | ||
self.log.error("Cannot import module '%s'", | ||
getattr(exc, "name", "")) | ||
self.log.debug("", exc_info=exc) | ||
raise exception.ExtractionError("yt-dlp or youtube-dl is required " | ||
"for this feature!") | ||
extr_opts = { | ||
"extract_flat" : True, | ||
"ignore_no_formats_error": True, | ||
} | ||
user_opts = { | ||
"retries" : self._retries, | ||
"socket_timeout" : self._timeout, | ||
"nocheckcertificate" : not self._verify, | ||
"playlist_items" : str(self.config("tiktok-range", "")), | ||
} | ||
if self._proxies: | ||
user_opts["proxy"] = self._proxies.get("http") | ||
|
||
ytdl_instance = ytdl.construct_YoutubeDL( | ||
module, self, user_opts, extr_opts) | ||
|
||
# transfer cookies to ytdl | ||
if self.cookies: | ||
set_cookie = ytdl_instance.cookiejar.set_cookie | ||
for cookie in self.cookies: | ||
set_cookie(cookie) | ||
|
||
with ytdl_instance as ydl: | ||
info_dict = ydl._YoutubeDL__extract_info( | ||
"{}/@{}".format(self.root, self.groups[0]), | ||
ydl.get_info_extractor("TikTokUser"), | ||
False, {}, True) | ||
# This should include video and photo posts in /video/ URL form. | ||
return [video["url"] for video in info_dict["entries"]] | ||
|
||
def avatar(self): | ||
return self.groups[0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.