From 68a9a450d432f67dc8c2531f053a5fd41b5f341a Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Fri, 4 Nov 2022 15:37:45 +0000 Subject: [PATCH] [extractor/genius] Add extractors (#5221) Closes #5209 Authored by: bashonly --- yt_dlp/extractor/_extractors.py | 4 + yt_dlp/extractor/genius.py | 127 ++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 yt_dlp/extractor/genius.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 0bcb6e185..020f3b454 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -636,6 +636,10 @@ from .gazeta import GazetaIE from .gdcvault import GDCVaultIE from .gedidigital import GediDigitalIE from .generic import GenericIE +from .genius import ( + GeniusIE, + GeniusLyricsIE, +) from .gettr import ( GettrIE, GettrStreamingIE, diff --git a/yt_dlp/extractor/genius.py b/yt_dlp/extractor/genius.py new file mode 100644 index 000000000..62f5a28ff --- /dev/null +++ b/yt_dlp/extractor/genius.py @@ -0,0 +1,127 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + js_to_json, + smuggle_url, + str_or_none, + traverse_obj, + unescapeHTML, +) + + +class GeniusIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?genius\.com/videos/(?P[^?/#]+)' + _TESTS = [{ + 'url': 'https://genius.com/videos/Vince-staples-breaks-down-the-meaning-of-when-sparks-fly', + 'md5': '64c2ad98cfafcfda23bfa0ad0c512f4c', + 'info_dict': { + 'id': '6313303597112', + 'ext': 'mp4', + 'title': 'Vince Staples Breaks Down The Meaning Of “When Sparks Fly”', + 'description': 'md5:bc15e00342c537c0039d414423ae5752', + 'tags': 'count:1', + 'uploader_id': '4863540648001', + 'duration': 388.416, + 'upload_date': '20221005', + 'timestamp': 1664982341, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + }, { + 'url': 'https://genius.com/videos/Breaking-down-drakes-certified-lover-boy-kanye-beef-way-2-sexy-cudi', + 'md5': 'b8ed87a5efd1473bd027c20a969d4060', + 'info_dict': { + 'id': '6271792014001', + 'ext': 'mp4', + 'title': 'md5:c6355f7fa8a70bc86492a3963919fc15', + 'description': 'md5:1774638c31548b31b037c09e9b821393', + 'tags': 'count:3', + 'uploader_id': '4863540648001', + 'duration': 2685.099, + 'upload_date': '20210909', + 'timestamp': 1631209167, + 'thumbnail': r're:^https?://.*\.jpg$', + }, + }] + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + + metadata = self._search_json( + r'