From faf7863bb0898c4a7972cd77b12a619bbc79c914 Mon Sep 17 00:00:00 2001 From: Lesmiscore Date: Sat, 24 Sep 2022 18:30:31 +0900 Subject: [PATCH] [extractor/Smotrim] Add extractor (#5015) Authored by: nikita-moor, Lesmiscore --- yt_dlp/extractor/_extractors.py | 1 + yt_dlp/extractor/smotrim.py | 65 +++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 yt_dlp/extractor/smotrim.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index c2575bc92..f334b7833 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1619,6 +1619,7 @@ from .sky import ( from .slideshare import SlideshareIE from .slideslive import SlidesLiveIE from .slutload import SlutloadIE +from .smotrim import SmotrimIE from .snotr import SnotrIE from .sohu import SohuIE from .sonyliv import ( diff --git a/yt_dlp/extractor/smotrim.py b/yt_dlp/extractor/smotrim.py new file mode 100644 index 000000000..d3f1b695b --- /dev/null +++ b/yt_dlp/extractor/smotrim.py @@ -0,0 +1,65 @@ +from .common import InfoExtractor +from ..utils import ExtractorError + + +class SmotrimIE(InfoExtractor): + _VALID_URL = r'https?://smotrim\.ru/(?Pbrand|video|article|live)/(?P[0-9]+)' + _TESTS = [{ # video + 'url': 'https://smotrim.ru/video/1539617', + 'md5': 'b1923a533c8cab09679789d720d0b1c5', + 'info_dict': { + 'id': '1539617', + 'ext': 'mp4', + 'title': 'Полиглот. Китайский с нуля за 16 часов! Урок №16', + 'description': '', + }, + 'add_ie': ['RUTV'], + }, { # article (geo-restricted? plays fine from the US and JP) + 'url': 'https://smotrim.ru/article/2813445', + 'md5': 'e0ac453952afbc6a2742e850b4dc8e77', + 'info_dict': { + 'id': '2431846', + 'ext': 'mp4', + 'title': 'Новости культуры. Съёмки первой программы "Большие и маленькие"', + 'description': 'md5:94a4a22472da4252bf5587a4ee441b99', + }, + 'add_ie': ['RUTV'], + }, { # brand, redirect + 'url': 'https://smotrim.ru/brand/64356', + 'md5': '740472999ccff81d7f6df79cecd91c18', + 'info_dict': { + 'id': '2354523', + 'ext': 'mp4', + 'title': 'Большие и маленькие. Лучшее. 4-й выпуск', + 'description': 'md5:84089e834429008371ea41ea3507b989', + }, + 'add_ie': ['RUTV'], + }, { # live + 'url': 'https://smotrim.ru/live/19201', + 'info_dict': { + 'id': '19201', + 'ext': 'mp4', + # this looks like a TV channel name + 'title': 'Россия Культура. Прямой эфир', + 'description': '', + }, + 'add_ie': ['RUTV'], + }] + + def _real_extract(self, url): + video_id, typ = self._match_valid_url(url).group('id', 'type') + rutv_type = 'video' + if typ not in ('video', 'live'): + webpage = self._download_webpage(url, video_id, f'Resolving {typ} link') + # there are two cases matching regex: + # 1. "embedUrl" in JSON LD (/brand/) + # 2. "src" attribute from iframe (/article/) + video_id = self._search_regex( + r'"https://player.smotrim.ru/iframe/video/id/(?P\d+)/', + webpage, 'video_id', default=None) + if not video_id: + raise ExtractorError('There are no video in this page.', expected=True) + elif typ == 'live': + rutv_type = 'live' + + return self.url_result(f'https://player.vgtrk.com/iframe/{rutv_type}/id/{video_id}')