This thread contains a patchset. You're looking at the original emails, but you may wish to use the patch review UI. Review patch

[PATCH] Fix regular captions

Message ID
DKIM signature
Download raw message
Patch: +1 -48
 extractors/captions.py | 49 +-----------------------------------------
 1 file changed, 1 insertion(+), 48 deletions(-)

diff --git a/extractors/captions.py b/extractors/captions.py
index 418ad4f..d05ec4e 100644
--- a/extractors/captions.py
+++ b/extractors/captions.py
@@ -6,10 +6,7 @@ from urllib.parse import urlencode
import xml.etree.ElementTree as ET

def extract_captions(id, **kwargs):
	if "label" in kwargs and "auto-generated" in kwargs["label"]:
		captions = extract_captions_from_video(id)
		captions = extract_captions_from_api(id)
	captions = extract_captions_from_video(id)
	return extract_captions_from_dict(captions, **kwargs)

# Return captions for the language specified,
@@ -26,50 +23,6 @@ def extract_captions_from_dict(captions, *, lang=None, label=None):
			return re.sub(r"^([0-9:.]+ --> [0-9:.]+).*$", r"\1", r.content.decode("utf8"), flags=re.MULTILINE)
		return r

# List of captions directly from youtube, but no automatic
def extract_captions_from_api(id):
	url = "https://video.google.com/timedtext?hl=en&type=list&v={}".format(id)
	with requests.get(url) as r:
		if r.status_code == 404:
			return {
				"error": "Video unavailable",
				"identifier": "NOT_FOUND"


		transcript = ET.fromstring(r.content.decode("utf8"))
		tracks = transcript.findall("track")

		captions = []
		result = {
			"captions": captions

		for track in tracks:
			language_code = track.attrib["lang_code"]
			label = track.get("name", default=language_code)
			subtitle_api_url = get_subtitle_api_url(id, label, language_code)

			params = urlencode({
				"lang": language_code,
				"v": id,
				"fmt": "vtt",
				"name": label

			subtitle_url = "https://www.youtube.com/api/timedtext?" + params

				"label": label if label != "" else language_code,
				"languageCode": language_code,
				"url": subtitle_api_url,
				"second__remoteUrl": subtitle_url

		return result

# We'll fall back to this function for auto-captions.
def extract_captions_from_video(id):
	return {
		"captions": extract_video(id)["captions"]
Message ID
<20211120074033.9396-1-bopol@e.email> (view parent)
DKIM signature
Download raw message
I wasn't sure about this at first because I was hesitant to lose all
this code, but I do now think it is the best option. And if we need
this code again in the future, it will always be in the git history.

Thank you!
Reply to thread Export thread (mbox)