From 6839d02cb666bd9f8ed6e9a97337c02ce1b19368 Mon Sep 17 00:00:00 2001 From: Paul Wise Date: Mon, 20 Dec 2021 10:48:41 +0800 Subject: [PATCH] [ABC:iview] Add show extractor (#1630) Authored by: pabs3 --- yt_dlp/extractor/abc.py | 64 ++++++++++++++++++++++++++++++++++ yt_dlp/extractor/extractors.py | 1 + 2 files changed, 65 insertions(+) diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index e3369306c5..354453a274 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -8,6 +8,7 @@ import time from .common import InfoExtractor from ..compat import compat_str from ..utils import ( + dict_get, ExtractorError, js_to_json, int_or_none, @@ -253,3 +254,66 @@ class ABCIViewIE(InfoExtractor): 'subtitles': subtitles, 'is_live': is_live, } + + +class ABCIViewShowSeriesIE(InfoExtractor): + IE_NAME = 'abc.net.au:iview:showseries' + _VALID_URL = r'https?://iview\.abc\.net\.au/show/(?P[^/]+)(?:/series/\d+)?$' + _GEO_COUNTRIES = ['AU'] + + _TESTS = [{ + 'url': 'https://iview.abc.net.au/show/upper-middle-bogan', + 'info_dict': { + 'id': '124870-1', + 'title': 'Series 1', + 'description': 'md5:93119346c24a7c322d446d8eece430ff', + 'series': 'Upper Middle Bogan', + 'season': 'Series 1', + 'thumbnail': r're:^https?://cdn\.iview\.abc\.net\.au/thumbs/.*\.jpg$' + }, + 'playlist_count': 8, + }, { + 'url': 'https://iview.abc.net.au/show/upper-middle-bogan', + 'info_dict': { + 'id': 'CO1108V001S00', + 'ext': 'mp4', + 'title': 'Series 1 Ep 1 I\'m A Swan', + 'description': 'md5:7b676758c1de11a30b79b4d301e8da93', + 'series': 'Upper Middle Bogan', + 'uploader_id': 'abc1', + 'upload_date': '20210630', + 'timestamp': 1625036400, + }, + 'params': { + 'noplaylist': True, + 'skip_download': 'm3u8', + }, + }] + + def _real_extract(self, url): + show_id = self._match_id(url) + webpage = self._download_webpage(url, show_id) + webpage_data = self._search_regex( + r'window\.__INITIAL_STATE__\s*=\s*[\'"](.+?)[\'"]\s*;', + webpage, 'initial state') + video_data = self._parse_json( + unescapeHTML(webpage_data).encode('utf-8').decode('unicode_escape'), show_id) + video_data = video_data['route']['pageData']['_embedded'] + + if self.get_param('noplaylist') and 'highlightVideo' in video_data: + self.to_screen('Downloading just the highlight video because of --no-playlist') + return self.url_result(video_data['highlightVideo']['shareUrl'], ie=ABCIViewIE.ie_key()) + + self.to_screen(f'Downloading playlist {show_id} - add --no-playlist to just download the highlight video') + series = video_data['selectedSeries'] + return { + '_type': 'playlist', + 'entries': [self.url_result(episode['shareUrl']) + for episode in series['_embedded']['videoEpisodes']], + 'id': series.get('id'), + 'title': dict_get(series, ('title', 'displaySubtitle')), + 'description': series.get('description'), + 'series': dict_get(series, ('showTitle', 'displayTitle')), + 'season': dict_get(series, ('title', 'displaySubtitle')), + 'thumbnail': series.get('thumbnail'), + } diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 2584260507..da6f5d00fd 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals from .abc import ( ABCIE, ABCIViewIE, + ABCIViewShowSeriesIE, ) from .abcnews import ( AbcNewsIE,