~lomanic: 2 Stream responses on /vi and /ggpht endpoints Fix #18 proxy videos through new /videoplayback endpoint 4 files changed, 29 insertions(+), 5 deletions(-)
Copy & paste the following snippet into your terminal to import this patchset into git:
curl -s https://lists.sr.ht/~cadence/tube-devel/patches/26397/mbox | git am -3Learn more about email & git
From: Lomanic <lomanic@hotmail.fr> The chunk_size=None parameter to iter_content lets us consume data as soon as it arrives https://docs.python-requests.org/en/master/api/#requests.Response.iter_content --- index.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/index.py b/index.py index ffc35c7..91cdcfd 100644 --- a/index.py +++ b/index.py @@ -123,17 +123,17 @@ class NewLeaf(object): @cherrypy.expose def vi(self, id, file): - with requests.get("https://i.ytimg.com/vi/{}/{}".format(id, file)) as r: + with requests.get("https://i.ytimg.com/vi/{}/{}".format(id, file), stream=True) as r: r.raise_for_status() cherrypy.response.headers["content-type"] = r.headers["content-type"] - return r # no idea if this is a good way to do it, but it definitely works! :D + return next(r.iter_content(chunk_size=None)) @cherrypy.expose def ggpht(self, *path): - with requests.get("https://yt3.ggpht.com/{}".format("/".join(path))) as r: + with requests.get("https://yt3.ggpht.com/{}".format("/".join(path)), stream=True) as r: r.raise_for_status() cherrypy.response.headers["content-type"] = r.headers["content-type"] - return r + return next(r.iter_content(chunk_size=None)) bind_port = getattr(configuration, "bind_port", 3000) bind_host = getattr(configuration, "bind_host", "0.0.0.0") -- 2.32.0
From: Lomanic <lomanic@hotmail.fr> --- configuration.sample.py | 3 +++ extractors/video.py | 5 ++++- index.py | 18 ++++++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/configuration.sample.py b/configuration.sample.py index b3a1ad9..1277fa8 100644 --- a/configuration.sample.py +++ b/configuration.sample.py @@ -15,3 +15,6 @@ website_origin = "http://example.com:3000" # The port to bind to. #bind_port = 3000 + +# Tell NewLeaf to proxy video streams +# proxy_videos = False diff --git a/extractors/video.py b/extractors/video.py index fe23de7..353c60c 100644 --- a/extractors/video.py +++ b/extractors/video.py @@ -14,6 +14,7 @@ from urllib.parse import parse_qs, urlparse, urlencode from cachetools import TTLCache video_cache = TTLCache(maxsize=50, ttl=300) +proxy_videos = getattr(configuration, "proxy_videos", False) ytdl_opts = { "quiet": True, @@ -127,6 +128,8 @@ def extract_video(id): url = format["fragment_base_url"] else: # just a normal media file url = format["url"] + if proxy_videos: + url = "{}/videoplayback?{}".format(configuration.website_origin, urlencode({"url": url})) result["adaptiveFormats"].append({ "index": None, "bitrate": str(int(format["tbr"]*1000)), @@ -151,7 +154,7 @@ def extract_video(id): }) else: # format is not adaptive result["formatStreams"].append({ - "url": format["url"], + "url": "{}/videoplayback?{}".format(configuration.website_origin, urlencode({"url": format["url"]})) if proxy_videos else format["url"], "itag": format["format_id"], "type": result_type, "second__mime": mime, diff --git a/index.py b/index.py index 91cdcfd..eba1635 100644 --- a/index.py +++ b/index.py @@ -3,6 +3,7 @@ import json import pathlib import requests import yt_dlp +from urllib.parse import urlparse from extractors.video import extract_video from extractors.channel import extract_channel, extract_channel_videos, extract_channel_latest from extractors.manifest import extract_manifest @@ -135,6 +136,23 @@ class NewLeaf(object): cherrypy.response.headers["content-type"] = r.headers["content-type"] return next(r.iter_content(chunk_size=None)) + @cherrypy.expose + def videoplayback(self, url): + proxy_videos = getattr(configuration, "proxy_videos", False) + if not proxy_videos: + raise cherrypy.HTTPError(401, "Unauthorized") + parts = urlparse(url) + if not (parts.hostname.endswith(".googlevideo.com") and parts.path == "/videoplayback"): + raise cherrypy.HTTPError(401, "Unauthorized") + headers = {k: v for k, v in cherrypy.request.headers.items() if k.lower() not in ["host", "remote-addr"] and not k.lower().startswith("x-")} + with requests.get(url, headers=headers, stream=True) as r: + r.raise_for_status() + for k, v in r.headers.items(): + cherrypy.response.headers[k] = v + cherrypy.response.status = r.status_code + # return 5MB at a time, the browser will ask for the next chunk thanks to HTTP 206 Partial Content streaming magic + return next(r.iter_content(chunk_size=5*1024*1024)) + bind_port = getattr(configuration, "bind_port", 3000) bind_host = getattr(configuration, "bind_host", "0.0.0.0") server_root = pathlib.Path(__file__).parent.joinpath("root") -- 2.32.0