diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 545cbe2049..01281b5a15 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1295,6 +1295,7 @@ from .nrl import NRLTVIE from .ntvcojp import NTVCoJpCUIE from .ntvde import NTVDeIE from .ntvru import NTVRuIE +from .nubilesporn import NubilesPornIE from .nytimes import ( NYTimesIE, NYTimesArticleIE, diff --git a/yt_dlp/extractor/nubilesporn.py b/yt_dlp/extractor/nubilesporn.py new file mode 100644 index 0000000000..d4f1d9d67a --- /dev/null +++ b/yt_dlp/extractor/nubilesporn.py @@ -0,0 +1,99 @@ +import re + +from .common import InfoExtractor +from ..utils import ( + clean_html, + float_or_none, + format_field, + get_element_by_class, + get_element_by_id, + get_element_html_by_class, + get_elements_by_class, + int_or_none, + try_call, + unified_timestamp, + urlencode_postdata, +) + + +class NubilesPornIE(InfoExtractor): + _NETRC_MACHINE = 'nubiles-porn' + _VALID_URL = r'''(?x) + https://members.nubiles-porn.com/video/watch/(?P\d+) + (?:/(?P[\w\-]+-s(?P\d+)e(?P\d+)))? + ''' + + _TESTS = [{ + 'url': 'https://members.nubiles-porn.com/video/watch/165320/trying-to-focus-my-one-track-mind-s3e1', + 'md5': 'fa7f09da8027c35e4bdf0f94f55eac82', + 'info_dict': { + 'id': '165320', + 'title': 'Trying To Focus My One Track Mind - S3:E1', + 'ext': 'mp4', + 'display_id': 'trying-to-focus-my-one-track-mind-s3e1', + 'thumbnail': 'https://images.nubiles-porn.com/videos/trying_to_focus_my_one_track_mind/samples/cover1280.jpg', + 'description': 'md5:81f3d4372e0e39bff5c801da277a5141', + 'timestamp': 1676160000, + 'upload_date': '20230212', + 'channel': 'Younger Mommy', + 'channel_id': '64', + 'channel_url': 'https://members.nubiles-porn.com/video/website/64', + 'like_count': int, + 'average_rating': float, + 'age_limit': 18, + 'categories': ['Big Boobs', 'Big Naturals', 'Blowjob', 'Brunette', 'Cowgirl', 'Girl Orgasm', 'Girl-Boy', + 'Glasses', 'Hardcore', 'Milf', 'Shaved Pussy', 'Tattoos', 'YoungerMommy.com'], + 'tags': list, + 'cast': ['Kenzie Love'], + 'availability': 'needs_auth', + 'series': 'Younger Mommy', + 'series_id': '64', + 'season': 'Season 3', + 'season_number': 3, + 'episode': 'Episode 1', + 'episode_number': 1 + } + }] + + def _perform_login(self, username, password): + login_webpage = self._download_webpage('https://nubiles-porn.com/login', video_id=None) + inputs = self._hidden_inputs(login_webpage) + inputs.update({'username': username, 'password': password}) + self._request_webpage('https://nubiles-porn.com/authentication/login', None, data=urlencode_postdata(inputs)) + + def _real_extract(self, url): + url_match = self._match_valid_url(url) + video_id = url_match.group('id') + page = self._download_webpage(url, video_id) + + media_entries = self._parse_html5_media_entries( + url, get_element_by_class('watch-page-video-wrapper', page), video_id)[0] + + channel_id, channel_name = self._search_regex( + r'/video/website/(?P\d+).+>(?P\w+).com', get_element_html_by_class('site-link', page), + 'channel', fatal=False, group=('id', 'name')) or (None, None) + channel_name = re.sub(r'([^A-Z]+)([A-Z]+)', r'\1 \2', channel_name) + + return { + 'id': video_id, + 'title': self._search_regex('

([^<]+)

', page, 'title', fatal=False), + 'formats': media_entries.get('formats'), + 'display_id': url_match.group('display_id'), + 'thumbnail': media_entries.get('thumbnail'), + 'description': clean_html(get_element_html_by_class('content-pane-description', page)), + 'timestamp': unified_timestamp(get_element_by_class('date', page)), + 'channel': channel_name, + 'channel_id': channel_id, + 'channel_url': format_field(channel_id, None, 'https://members.nubiles-porn.com/video/website/%s'), + 'like_count': int_or_none(get_element_by_id('likecount', page)), + 'average_rating': float_or_none(get_element_by_class('score', page)), + 'age_limit': 18, + 'categories': try_call(lambda: list(map(clean_html, get_elements_by_class('btn', get_element_by_class('categories', page))))), + 'tags': try_call(lambda: list(map(clean_html, get_elements_by_class('btn', get_elements_by_class('tags', page)[1])))), + 'cast': get_elements_by_class('content-pane-performer', page), + 'availability': 'needs_auth', + 'series': channel_name, + 'series_id': channel_id, + 'season_number': int_or_none(url_match.group('season')), + 'episode_number': int_or_none(url_match.group('episode')) + }