From 08d937c1c1002b8e67cc685c889da09d701139e4 Mon Sep 17 00:00:00 2001 From: FairTrade Date: Mon, 1 Dec 2025 16:36:24 +0100 Subject: [PATCH] Added subtitle support for VIDO --- README.md | 2 +- VIDO/__init__.py | 179 ++++++++++++++++++++++++++++++++--------------- 2 files changed, 123 insertions(+), 58 deletions(-) diff --git a/README.md b/README.md index 5f1e474..797c382 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ 6. VIKI - CSRF Token is now scraped, would be from a api requests soon 7. VIDO - - Subtitle support + - Subtitle has little quirk of having javanese and sundanese language labeled on the HLS one but not the DASH one - Search functionality not available yet 8. KNPY - Need to fix the search function diff --git a/VIDO/__init__.py b/VIDO/__init__.py index cb343bf..7e2b5b8 100644 --- a/VIDO/__init__.py +++ b/VIDO/__init__.py @@ -1,18 +1,19 @@ import re import uuid -import base64 +import xml.etree.ElementTree as ET +from urllib.parse import urljoin +from hashlib import md5 from typing import Optional, Union from http.cookiejar import CookieJar from langcodes import Language import click -from unshackle.core.search_result import SearchResult from unshackle.core.credential import Credential from unshackle.core.manifests import HLS, DASH from unshackle.core.service import Service from unshackle.core.titles import Episode, Movie, Movies, Series, Title_T, Titles_T -from unshackle.core.tracks import Chapter, Tracks +from unshackle.core.tracks import Chapter, Tracks, Subtitle from unshackle.core.constants import AnyTrack from datetime import datetime, timezone @@ -20,22 +21,18 @@ from datetime import datetime, timezone class VIDO(Service): """ Vidio.com service, Series and Movies, login required. - Version: 2.1.0 + Version: 2.2.0 Supports URLs like: • https://www.vidio.com/premier/2978/giligilis (Series) • https://www.vidio.com/watch/7454613-marantau-short-movie (Movie) Security: HD@L3 (Widevine DRM when available) - - Note: Login is mandatory. Even free content requires valid session tokens - for stream access (as per API behavior). """ - # Updated regex to support both series and movies TITLE_RE = r"^https?://(?:www\.)?vidio\.com/(?:premier|series|watch)/(?P\d+)" - NO_SUBTITLES = True GEOFENCE = ("ID",) + @staticmethod @click.command(name="VIDO", short_help="https://vidio.com (login required)") @click.argument("title", type=str) @@ -51,7 +48,6 @@ class VIDO(Service): raise ValueError(f"Unsupported or invalid Vidio URL: {title}") self.content_id = match.group("id") - # Determine if it's a movie or series based on URL pattern self.is_movie = "watch" in title # Static app identifiers from Android traffic @@ -77,10 +73,7 @@ class VIDO(Service): self._email = credential.username password = credential.password - # Define a unique key for this user's authentication tokens cache_key = f"auth_tokens_{self._email}" - - # Get a specific cache object for this key cache = self.cache.get(cache_key) # Check if valid tokens are already in the cache @@ -89,7 +82,6 @@ class VIDO(Service): cached_data = cache.data self._user_token = cached_data.get("user_token") self._access_token = cached_data.get("access_token") - # If tokens were successfully loaded, we're done if self._user_token and self._access_token: return @@ -120,10 +112,11 @@ class VIDO(Service): expires_at_dt = datetime.fromisoformat(expires_at_str) now_utc = datetime.now(timezone.utc) expiration_in_seconds = max(0, int((expires_at_dt - now_utc).total_seconds())) - self.log.info(f"Token expires in {expiration_in_seconds / 60:.2f} minutes. Caching for this duration.") + self.log.info(f"Token expires in {expiration_in_seconds / 60:.2f} minutes.") except (KeyError, ValueError) as e: - self.log.warning(f"Could not parse token expiration time from API: {e}. Defaulting to 1 hour.") - expiration_in_seconds = 3600 # Fallback to 1 hour + self.log.warning(f"Could not parse token expiration: {e}. Defaulting to 1 hour.") + expiration_in_seconds = 3600 + cache.set({ "user_token": self._user_token, "access_token": self._access_token @@ -148,6 +141,66 @@ class VIDO(Service): "content-type": "application/vnd.api+json", } + def _extract_subtitles_from_mpd(self, mpd_url: str) -> list[Subtitle]: + """ + Manually parse the MPD to extract subtitle tracks. + Handles plain VTT format (for free content). + """ + subtitles = [] + + try: + r = self.session.get(mpd_url) + r.raise_for_status() + mpd_content = r.text + + # Get base URL for resolving relative paths + base_url = mpd_url.rsplit('/', 1)[0] + '/' + + # Remove namespace for easier parsing + mpd_content_clean = re.sub(r'\sxmlns="[^"]+"', '', mpd_content) + root = ET.fromstring(mpd_content_clean) + + for adaptation_set in root.findall('.//AdaptationSet'): + content_type = adaptation_set.get('contentType', '') + + if content_type != 'text': + continue + + lang = adaptation_set.get('lang', 'und') + + for rep in adaptation_set.findall('Representation'): + mime_type = rep.get('mimeType', '') + + # Handle plain VTT (free content) + if mime_type == 'text/vtt': + segment_list = rep.find('SegmentList') + if segment_list is not None: + for segment_url in segment_list.findall('SegmentURL'): + media = segment_url.get('media') + if media: + full_url = urljoin(base_url, media) + + # Determine if auto-generated + is_auto = '-auto' in lang + clean_lang = lang.replace('-auto', '') + + subtitle = Subtitle( + id_=md5(full_url.encode()).hexdigest()[0:16], + url=full_url, + codec=Subtitle.Codec.WebVTT, + language=Language.get(clean_lang), + forced=False, + sdh=False, + ) + + subtitles.append(subtitle) + self.log.debug(f"Found VTT subtitle: {lang} -> {full_url}") + + except Exception as e: + self.log.warning(f"Failed to extract subtitles from MPD: {e}") + + return subtitles + def get_titles(self) -> Titles_T: headers = self._headers() @@ -173,13 +226,11 @@ class VIDO(Service): ) ]) else: - # Fetch the main content profile r = self.session.get(f"https://api.vidio.com/content_profiles/{self.content_id}", headers=headers) r.raise_for_status() root = r.json()["data"] series_title = root["attributes"]["title"] - # Fetch all playlists (seasons + extras) r_playlists = self.session.get( f"https://api.vidio.com/content_profiles/{self.content_id}/playlists", headers=headers @@ -194,18 +245,15 @@ class VIDO(Service): if group.get("type") == "season": season_playlist_ids.update(group.get("playlist_ids", [])) - # If no metadata, fall back to name-based detection season_playlists = [] for pl in playlists_data["data"]: playlist_id = int(pl["id"]) name = pl["attributes"]["name"].lower() - # Use metadata if available, otherwise use name matching if season_playlist_ids: if playlist_id in season_playlist_ids: season_playlists.append(pl) else: - # Fallback: match "season" but exclude "trailer" and "extra" if ("season" in name or name == "episode" or name == "episodes") and \ "trailer" not in name and "extra" not in name: season_playlists.append(pl) @@ -213,14 +261,11 @@ class VIDO(Service): if not season_playlists: raise ValueError("No season playlists found for this series.") - # Sort seasons and extract season numbers def extract_season_number(pl): name = pl["attributes"]["name"] - # Try to extract number after "Season" match = re.search(r"season\s*(\d+)", name, re.IGNORECASE) if match: return int(match.group(1)) - # If it's just "Season" or "Episodes", treat as Season 1 elif name.lower() in ["season", "episodes", "episode"]: return 1 else: @@ -234,7 +279,6 @@ class VIDO(Service): playlist_id = playlist["id"] season_number = extract_season_number(playlist) - # If season_number is 0, default to 1 if season_number == 0: season_number = 1 @@ -257,7 +301,6 @@ class VIDO(Service): for raw_ep in page_data["data"]: attrs = raw_ep["attributes"] - # Count episodes within the same season ep_number = len([e for e in all_episodes if e.season == season_number]) + 1 all_episodes.append( Episode( @@ -292,8 +335,8 @@ class VIDO(Service): "x-device-os": "Android 15 (API 35)", "x-device-android-mpc": "0", "x-device-cpu-arch": "arm64-v8a", - "x-device-platform": "android", - "x-app-version": "7.14.6-e4d1de87f2-3191683", + "x-device-platform": "android", + "x-app-version": "7.14.6-e4d1de87f2-3191683", }) video_id = str(title.id) @@ -303,45 +346,67 @@ class VIDO(Service): r.raise_for_status() stream = r.json() - # Safety check: ensure stream is a valid dict if not isinstance(stream, dict): - raise ValueError("Vidio returned invalid stream data (not a JSON object). " - "Content may be geo-blocked, subscription-restricted, or session expired.") - + raise ValueError("Vidio returned invalid stream data.") + # Extract DRM info custom_data = stream.get("custom_data") or {} license_servers = stream.get("license_servers") or {} widevine_data = custom_data.get("widevine") if isinstance(custom_data, dict) else None license_url = license_servers.get("drm_license_url") if isinstance(license_servers, dict) else None - dash_url = stream.get("stream_dash_url") - - has_valid_drm = bool(widevine_data and license_url and dash_url and isinstance(widevine_data, str)) - - if has_valid_drm: + + # Get stream URLs + dash_url = stream.get("stream_dash_url") or stream.get("stream_token_dash_url") + hls_url = stream.get("stream_hls_url") or stream.get("stream_token_hls_url") + + has_drm = widevine_data and license_url and dash_url and isinstance(widevine_data, str) + + if has_drm: + # DRM content: use DASH self.log.info("Widevine DRM detected, using DASH") self.custom_data = widevine_data self.license_url = license_url tracks = DASH.from_url(dash_url, session=self.session).to_tracks(language=title.language) + + elif hls_url: + # Non-DRM: use HLS for video/audio + self.log.info("No DRM detected, using HLS for video/audio") + self.custom_data = None + self.license_url = None + tracks = HLS.from_url(hls_url, session=self.session).to_tracks(language=title.language) + + # Clear HLS subtitles (they're segmented and incompatible) + if tracks.subtitles: + self.log.debug("Clearing HLS subtitles (incompatible format)") + tracks.subtitles.clear() + + # Get subtitles from DASH manifest (plain VTT) + if dash_url: + self.log.debug("Extracting subtitles from DASH manifest") + manual_subs = self._extract_subtitles_from_mpd(dash_url) + if manual_subs: + for sub in manual_subs: + tracks.add(sub) + self.log.info(f"Added {len(manual_subs)} subtitle tracks from DASH") + + elif dash_url: + # Fallback to DASH + self.log.warning("No HLS available, using DASH (VP9 codec)") + self.custom_data = None + self.license_url = None + tracks = DASH.from_url(dash_url, session=self.session).to_tracks(language=title.language) + + # Try manual subtitle extraction for non-DRM DASH + if not tracks.subtitles: + manual_subs = self._extract_subtitles_from_mpd(dash_url) + if manual_subs: + for sub in manual_subs: + tracks.add(sub) else: - # Prefer HLS for non-DRM (more reliable metadata, avoids frame_rate=None) - self.log.info("No valid Widevine DRM, using HLS") - hls_url = stream.get("stream_hls_url") or stream.get("stream_token_hls_url") - if hls_url: - self.log.debug(f"HLS URL: {hls_url}") - tracks = HLS.from_url(hls_url, session=self.session).to_tracks(language=title.language) - else: - # Last resort: non-DRM DASH (e.g., VP9), but warn user - dash_url = stream.get("stream_token_dash_url") - if dash_url: - self.log.warning("HLS unavailable, falling back to non-DRM DASH (may lack frame rate metadata)") - tracks = DASH.from_url(dash_url, session=self.session).to_tracks(language=title.language) - else: - raise ValueError( - "No playable stream (HLS or DASH) available. " - "This episode may be restricted, unavailable, or require a higher subscription tier." - ) + raise ValueError("No playable stream (DASH or HLS) available.") - self.log.info(f"Found {len(tracks.videos)} video tracks, {len(tracks.audio)} audio tracks") + self.log.info(f"Found {len(tracks.videos)} video tracks, {len(tracks.audio)} audio tracks, {len(tracks.subtitles)} subtitle tracks") + return tracks def get_chapters(self, title: Title_T) -> list[Chapter]: @@ -376,4 +441,4 @@ class VIDO(Service): error_summary = response.text[:200] if response.text else "No response body" raise Exception(f"License request failed ({response.status_code}): {error_summary}") - return response.content \ No newline at end of file + return response.content