---
extractors/captions.py | 49 +-----------------------------------------
1 file changed, 1 insertion(+), 48 deletions(-)
diff --git a/extractors/captions.py b/extractors/captions.py
index 418ad4f..d05ec4e 100644
--- a/extractors/captions.py
@@ -6,10 +6,7 @@ from urllib.parse import urlencode
import xml.etree.ElementTree as ET
def extract_captions(id, **kwargs):
- if "label" in kwargs and "auto-generated" in kwargs["label"]:
- captions = extract_captions_from_video(id)
- else:
- captions = extract_captions_from_api(id)
+ captions = extract_captions_from_video(id)
return extract_captions_from_dict(captions, **kwargs)
# Return captions for the language specified,
@@ -26,50 +23,6 @@ def extract_captions_from_dict(captions, *, lang=None, label=None):
return re.sub(r"^([0-9:.]+ --> [0-9:.]+).*$", r"\1", r.content.decode("utf8"), flags=re.MULTILINE)
return r
-# List of captions directly from youtube, but no automatic
-def extract_captions_from_api(id):
- url = "https://video.google.com/timedtext?hl=en&type=list&v={}".format(id)
- with requests.get(url) as r:
- if r.status_code == 404:
- return {
- "error": "Video unavailable",
- "identifier": "NOT_FOUND"
- }
-
- r.raise_for_status()
-
- transcript = ET.fromstring(r.content.decode("utf8"))
- tracks = transcript.findall("track")
-
- captions = []
- result = {
- "captions": captions
- }
-
- for track in tracks:
- language_code = track.attrib["lang_code"]
- label = track.get("name", default=language_code)
- subtitle_api_url = get_subtitle_api_url(id, label, language_code)
-
- params = urlencode({
- "lang": language_code,
- "v": id,
- "fmt": "vtt",
- "name": label
- })
-
- subtitle_url = "https://www.youtube.com/api/timedtext?" + params
-
- captions.append({
- "label": label if label != "" else language_code,
- "languageCode": language_code,
- "url": subtitle_api_url,
- "second__remoteUrl": subtitle_url
- })
-
- return result
-
-# We'll fall back to this function for auto-captions.
def extract_captions_from_video(id):
return {
"captions": extract_video(id)["captions"]
--
2.31.1
I wasn't sure about this at first because I was hesitant to lose all
this code, but I do now think it is the best option. And if we need
this code again in the future, it will always be in the git history.
Thank you!