~cadence/tube-devel

Fix regular captions v1 APPLIED

bopol: 1
 Fix regular captions

 1 files changed, 1 insertions(+), 48 deletions(-)
Export patchset (mbox)
How do I use this?

Copy & paste the following snippet into your terminal to import this patchset into git:

curl -s https://lists.sr.ht/~cadence/tube-devel/patches/26702/mbox | git am -3
Learn more about email & git

[PATCH] Fix regular captions Export this patch

---
 extractors/captions.py | 49 +-----------------------------------------
 1 file changed, 1 insertion(+), 48 deletions(-)

diff --git a/extractors/captions.py b/extractors/captions.py
index 418ad4f..d05ec4e 100644
--- a/extractors/captions.py
+++ b/extractors/captions.py
@@ -6,10 +6,7 @@ from urllib.parse import urlencode
import xml.etree.ElementTree as ET

def extract_captions(id, **kwargs):
	if "label" in kwargs and "auto-generated" in kwargs["label"]:
		captions = extract_captions_from_video(id)
	else:
		captions = extract_captions_from_api(id)
	captions = extract_captions_from_video(id)
	return extract_captions_from_dict(captions, **kwargs)

# Return captions for the language specified,
@@ -26,50 +23,6 @@ def extract_captions_from_dict(captions, *, lang=None, label=None):
			return re.sub(r"^([0-9:.]+ --> [0-9:.]+).*$", r"\1", r.content.decode("utf8"), flags=re.MULTILINE)
		return r

# List of captions directly from youtube, but no automatic
def extract_captions_from_api(id):
	url = "https://video.google.com/timedtext?hl=en&type=list&v={}".format(id)
	with requests.get(url) as r:
		if r.status_code == 404:
			return {
				"error": "Video unavailable",
				"identifier": "NOT_FOUND"
			}

		r.raise_for_status()

		transcript = ET.fromstring(r.content.decode("utf8"))
		tracks = transcript.findall("track")

		captions = []
		result = {
			"captions": captions
		}

		for track in tracks:
			language_code = track.attrib["lang_code"]
			label = track.get("name", default=language_code)
			subtitle_api_url = get_subtitle_api_url(id, label, language_code)

			params = urlencode({
				"lang": language_code,
				"v": id,
				"fmt": "vtt",
				"name": label
			})

			subtitle_url = "https://www.youtube.com/api/timedtext?" + params

			captions.append({
				"label": label if label != "" else language_code,
				"languageCode": language_code,
				"url": subtitle_api_url,
				"second__remoteUrl": subtitle_url
			})

		return result

# We'll fall back to this function for auto-captions.
def extract_captions_from_video(id):
	return {
		"captions": extract_video(id)["captions"]
-- 
2.31.1
I wasn't sure about this at first because I was hesitant to lose all
this code, but I do now think it is the best option. And if we need
this code again in the future, it will always be in the git history.

Thank you!