~cadence/tube-devel

This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch
1

[PATCH] Fix regular captions

Details
Message ID
<20211120074033.9396-1-bopol@e.email>
DKIM signature
missing
Download raw message
Patch: +1 -48
---
 extractors/captions.py | 49 +-----------------------------------------
 1 file changed, 1 insertion(+), 48 deletions(-)

diff --git a/extractors/captions.py b/extractors/captions.py
index 418ad4f..d05ec4e 100644
--- a/extractors/captions.py
+++ b/extractors/captions.py
@@ -6,10 +6,7 @@ from urllib.parse import urlencode
import xml.etree.ElementTree as ET

def extract_captions(id, **kwargs):
	if "label" in kwargs and "auto-generated" in kwargs["label"]:
		captions = extract_captions_from_video(id)
	else:
		captions = extract_captions_from_api(id)
	captions = extract_captions_from_video(id)
	return extract_captions_from_dict(captions, **kwargs)

# Return captions for the language specified,
@@ -26,50 +23,6 @@ def extract_captions_from_dict(captions, *, lang=None, label=None):
			return re.sub(r"^([0-9:.]+ --> [0-9:.]+).*$", r"\1", r.content.decode("utf8"), flags=re.MULTILINE)
		return r

# List of captions directly from youtube, but no automatic
def extract_captions_from_api(id):
	url = "https://video.google.com/timedtext?hl=en&type=list&v={}".format(id)
	with requests.get(url) as r:
		if r.status_code == 404:
			return {
				"error": "Video unavailable",
				"identifier": "NOT_FOUND"
			}

		r.raise_for_status()

		transcript = ET.fromstring(r.content.decode("utf8"))
		tracks = transcript.findall("track")

		captions = []
		result = {
			"captions": captions
		}

		for track in tracks:
			language_code = track.attrib["lang_code"]
			label = track.get("name", default=language_code)
			subtitle_api_url = get_subtitle_api_url(id, label, language_code)

			params = urlencode({
				"lang": language_code,
				"v": id,
				"fmt": "vtt",
				"name": label
			})

			subtitle_url = "https://www.youtube.com/api/timedtext?" + params

			captions.append({
				"label": label if label != "" else language_code,
				"languageCode": language_code,
				"url": subtitle_api_url,
				"second__remoteUrl": subtitle_url
			})

		return result

# We'll fall back to this function for auto-captions.
def extract_captions_from_video(id):
	return {
		"captions": extract_video(id)["captions"]
-- 
2.31.1
Details
Message ID
<20211216122813.fb552f33c01792be5e5a1890@disroot.org>
In-Reply-To
<20211120074033.9396-1-bopol@e.email> (view parent)
DKIM signature
missing
Download raw message
I wasn't sure about this at first because I was hesitant to lose all
this code, but I do now think it is the best option. And if we need
this code again in the future, it will always be in the git history.

Thank you!
Reply to thread Export thread (mbox)