Added subtitle support for VIDO

2025-12-01 16:36:24 +01:00 · 2025-12-01 16:36:24 +01:00 · 08d937c1c1
commit 08d937c1c1
parent 7385ca91a0
2 changed files with 123 additions and 58 deletions
--- a/README.md
+++ b/README.md
@ -24,7 +24,7 @@
    6. VIKI
        - CSRF Token is now scraped, would be from a api requests soon
    7. VIDO
-        - Subtitle support
+        - Subtitle has little quirk of having javanese and sundanese language labeled on the HLS one but not the DASH one
        - Search functionality not available yet
    8. KNPY
        - Need to fix the search function
--- a/VIDO/init.py
+++ b/VIDO/init.py
@ -1,18 +1,19 @@
 import re
 import uuid
-import base64
+import xml.etree.ElementTree as ET
 from urllib.parse import urljoin
 from hashlib import md5
 from typing import Optional, Union
 from http.cookiejar import CookieJar
 from langcodes import Language
 import click
 from unshackle.core.search_result import SearchResult
 from unshackle.core.credential import Credential
 from unshackle.core.manifests import HLS, DASH
 from unshackle.core.service import Service
 from unshackle.core.titles import Episode, Movie, Movies, Series, Title_T, Titles_T
-from unshackle.core.tracks import Chapter, Tracks
+from unshackle.core.tracks import Chapter, Tracks, Subtitle
 from unshackle.core.constants import AnyTrack
 from datetime import datetime, timezone
@ -20,22 +21,18 @@ from datetime import datetime, timezone
 class VIDO(Service):
    """
    Vidio.com service, Series and Movies, login required.
-    Version: 2.1.0
+    Version: 2.2.0
    Supports URLs like:
      • https://www.vidio.com/premier/2978/giligilis (Series)
      • https://www.vidio.com/watch/7454613-marantau-short-movie (Movie)
    Security: HD@L3 (Widevine DRM when available)
    Note: Login is mandatory. Even free content requires valid session tokens
          for stream access (as per API behavior).
    """
    # Updated regex to support both series and movies
    TITLE_RE = r"^https?://(?:www\.)?vidio\.com/(?:premier|series|watch)/(?P<id>\d+)"
    NO_SUBTITLES = True
    GEOFENCE = ("ID",)
    @staticmethod
    @click.command(name="VIDO", short_help="https://vidio.com (login required)")
    @click.argument("title", type=str)
@ -51,7 +48,6 @@ class VIDO(Service):
            raise ValueError(f"Unsupported or invalid Vidio URL: {title}")
        self.content_id = match.group("id")
        # Determine if it's a movie or series based on URL pattern
        self.is_movie = "watch" in title
        # Static app identifiers from Android traffic
@ -77,10 +73,7 @@ class VIDO(Service):
        self._email = credential.username
        password = credential.password
        # Define a unique key for this user's authentication tokens
        cache_key = f"auth_tokens_{self._email}"
        # Get a specific cache object for this key
        cache = self.cache.get(cache_key)
        # Check if valid tokens are already in the cache
@ -89,7 +82,6 @@ class VIDO(Service):
            cached_data = cache.data
            self._user_token = cached_data.get("user_token")
            self._access_token = cached_data.get("access_token")
            # If tokens were successfully loaded, we're done
            if self._user_token and self._access_token:
                return
@ -120,10 +112,11 @@ class VIDO(Service):
            expires_at_dt = datetime.fromisoformat(expires_at_str)
            now_utc = datetime.now(timezone.utc)
            expiration_in_seconds = max(0, int((expires_at_dt - now_utc).total_seconds()))
-            self.log.info(f"Token expires in {expiration_in_seconds / 60:.2f} minutes. Caching for this duration.")
+            self.log.info(f"Token expires in {expiration_in_seconds / 60:.2f} minutes.")
        except (KeyError, ValueError) as e:
-            self.log.warning(f"Could not parse token expiration time from API: {e}. Defaulting to 1 hour.")
+            self.log.warning(f"Could not parse token expiration: {e}. Defaulting to 1 hour.")
-            expiration_in_seconds = 3600 # Fallback to 1 hour
+            expiration_in_seconds = 3600
        cache.set({
            "user_token": self._user_token,
            "access_token": self._access_token
@ -148,6 +141,66 @@ class VIDO(Service):
            "content-type": "application/vnd.api+json",
        }
    def _extract_subtitles_from_mpd(self, mpd_url: str) -> list[Subtitle]:
        """
        Manually parse the MPD to extract subtitle tracks.
        Handles plain VTT format (for free content).
        """
        subtitles = []
        try:
            r = self.session.get(mpd_url)
            r.raise_for_status()
            mpd_content = r.text
            # Get base URL for resolving relative paths
            base_url = mpd_url.rsplit('/', 1)[0] + '/'
            # Remove namespace for easier parsing
            mpd_content_clean = re.sub(r'\sxmlns="[^"]+"', '', mpd_content)
            root = ET.fromstring(mpd_content_clean)
            for adaptation_set in root.findall('.//AdaptationSet'):
                content_type = adaptation_set.get('contentType', '')
                if content_type != 'text':
                    continue
                lang = adaptation_set.get('lang', 'und')
                for rep in adaptation_set.findall('Representation'):
                    mime_type = rep.get('mimeType', '')
                    # Handle plain VTT (free content)
                    if mime_type == 'text/vtt':
                        segment_list = rep.find('SegmentList')
                        if segment_list is not None:
                            for segment_url in segment_list.findall('SegmentURL'):
                                media = segment_url.get('media')
                                if media:
                                    full_url = urljoin(base_url, media)
                                    # Determine if auto-generated
                                    is_auto = '-auto' in lang
                                    clean_lang = lang.replace('-auto', '')
                                    subtitle = Subtitle(
                                        id_=md5(full_url.encode()).hexdigest()[0:16],
                                        url=full_url,
                                        codec=Subtitle.Codec.WebVTT,
                                        language=Language.get(clean_lang),
                                        forced=False,
                                        sdh=False,
                                    )
                                    subtitles.append(subtitle)
                                    self.log.debug(f"Found VTT subtitle: {lang} -> {full_url}")
        except Exception as e:
            self.log.warning(f"Failed to extract subtitles from MPD: {e}")
        return subtitles
    def get_titles(self) -> Titles_T:
        headers = self._headers()
@ -173,13 +226,11 @@ class VIDO(Service):
                )
            ])
        else:
            # Fetch the main content profile
            r = self.session.get(f"https://api.vidio.com/content_profiles/{self.content_id}", headers=headers)
            r.raise_for_status()
            root = r.json()["data"]
            series_title = root["attributes"]["title"]
            # Fetch all playlists (seasons + extras)
            r_playlists = self.session.get(
                f"https://api.vidio.com/content_profiles/{self.content_id}/playlists",
                headers=headers
@ -194,18 +245,15 @@ class VIDO(Service):
                    if group.get("type") == "season":
                        season_playlist_ids.update(group.get("playlist_ids", []))
            # If no metadata, fall back to name-based detection
            season_playlists = []
            for pl in playlists_data["data"]:
                playlist_id = int(pl["id"])
                name = pl["attributes"]["name"].lower()
                # Use metadata if available, otherwise use name matching
                if season_playlist_ids:
                    if playlist_id in season_playlist_ids:
                        season_playlists.append(pl)
                else:
                    # Fallback: match "season" but exclude "trailer" and "extra"
                    if ("season" in name or name == "episode" or name == "episodes") and \
                       "trailer" not in name and "extra" not in name:
                        season_playlists.append(pl)
@ -213,14 +261,11 @@ class VIDO(Service):
            if not season_playlists:
                raise ValueError("No season playlists found for this series.")
            # Sort seasons and extract season numbers
            def extract_season_number(pl):
                name = pl["attributes"]["name"]
                # Try to extract number after "Season"
                match = re.search(r"season\s*(\d+)", name, re.IGNORECASE)
                if match:
                    return int(match.group(1))
                # If it's just "Season" or "Episodes", treat as Season 1
                elif name.lower() in ["season", "episodes", "episode"]:
                    return 1
                else:
@ -234,7 +279,6 @@ class VIDO(Service):
                playlist_id = playlist["id"]
                season_number = extract_season_number(playlist)
                # If season_number is 0, default to 1
                if season_number == 0:
                    season_number = 1
@ -257,7 +301,6 @@ class VIDO(Service):
                    for raw_ep in page_data["data"]:
                        attrs = raw_ep["attributes"]
                        # Count episodes within the same season
                        ep_number = len([e for e in all_episodes if e.season == season_number]) + 1
                        all_episodes.append(
                            Episode(
@ -292,8 +335,8 @@ class VIDO(Service):
            "x-device-os": "Android 15 (API 35)",
            "x-device-android-mpc": "0",
            "x-device-cpu-arch": "arm64-v8a",
-            "x-device-platform": "android",                   
+            "x-device-platform": "android",
-            "x-app-version": "7.14.6-e4d1de87f2-3191683",     
+            "x-app-version": "7.14.6-e4d1de87f2-3191683",
        })
        video_id = str(title.id)
@ -303,45 +346,67 @@ class VIDO(Service):
        r.raise_for_status()
        stream = r.json()
        # Safety check: ensure stream is a valid dict
        if not isinstance(stream, dict):
-            raise ValueError("Vidio returned invalid stream data (not a JSON object). "
+            raise ValueError("Vidio returned invalid stream data.")
                             "Content may be geo-blocked, subscription-restricted, or session expired.")
        # Extract DRM info
        custom_data = stream.get("custom_data") or {}
        license_servers = stream.get("license_servers") or {}
        widevine_data = custom_data.get("widevine") if isinstance(custom_data, dict) else None
        license_url = license_servers.get("drm_license_url") if isinstance(license_servers, dict) else None
-        dash_url = stream.get("stream_dash_url")
+        
-
+        # Get stream URLs
-        has_valid_drm = bool(widevine_data and license_url and dash_url and isinstance(widevine_data, str))
+        dash_url = stream.get("stream_dash_url") or stream.get("stream_token_dash_url")
-
+        hls_url = stream.get("stream_hls_url") or stream.get("stream_token_hls_url")
-        if has_valid_drm:
+        
        has_drm = widevine_data and license_url and dash_url and isinstance(widevine_data, str)
        if has_drm:
            # DRM content: use DASH
            self.log.info("Widevine DRM detected, using DASH")
            self.custom_data = widevine_data
            self.license_url = license_url
            tracks = DASH.from_url(dash_url, session=self.session).to_tracks(language=title.language)
        elif hls_url:
            # Non-DRM: use HLS for video/audio
            self.log.info("No DRM detected, using HLS for video/audio")
            self.custom_data = None
            self.license_url = None
            tracks = HLS.from_url(hls_url, session=self.session).to_tracks(language=title.language)
            # Clear HLS subtitles (they're segmented and incompatible)
            if tracks.subtitles:
                self.log.debug("Clearing HLS subtitles (incompatible format)")
                tracks.subtitles.clear()
            # Get subtitles from DASH manifest (plain VTT)
            if dash_url:
                self.log.debug("Extracting subtitles from DASH manifest")
                manual_subs = self._extract_subtitles_from_mpd(dash_url)
                if manual_subs:
                    for sub in manual_subs:
                        tracks.add(sub)
                    self.log.info(f"Added {len(manual_subs)} subtitle tracks from DASH")
        elif dash_url:
            # Fallback to DASH
            self.log.warning("No HLS available, using DASH (VP9 codec)")
            self.custom_data = None
            self.license_url = None
            tracks = DASH.from_url(dash_url, session=self.session).to_tracks(language=title.language)
            # Try manual subtitle extraction for non-DRM DASH
            if not tracks.subtitles:
                manual_subs = self._extract_subtitles_from_mpd(dash_url)
                if manual_subs:
                    for sub in manual_subs:
                        tracks.add(sub)
        else:
-            # Prefer HLS for non-DRM (more reliable metadata, avoids frame_rate=None)
+            raise ValueError("No playable stream (DASH or HLS) available.")
            self.log.info("No valid Widevine DRM, using HLS")
            hls_url = stream.get("stream_hls_url") or stream.get("stream_token_hls_url")
            if hls_url:
                self.log.debug(f"HLS URL: {hls_url}")
                tracks = HLS.from_url(hls_url, session=self.session).to_tracks(language=title.language)
            else:
                # Last resort: non-DRM DASH (e.g., VP9), but warn user
                dash_url = stream.get("stream_token_dash_url")
                if dash_url:
                    self.log.warning("HLS unavailable, falling back to non-DRM DASH (may lack frame rate metadata)")
                    tracks = DASH.from_url(dash_url, session=self.session).to_tracks(language=title.language)
                else:
                    raise ValueError(
                        "No playable stream (HLS or DASH) available. "
                        "This episode may be restricted, unavailable, or require a higher subscription tier."
                    )
-        self.log.info(f"Found {len(tracks.videos)} video tracks, {len(tracks.audio)} audio tracks")
+        self.log.info(f"Found {len(tracks.videos)} video tracks, {len(tracks.audio)} audio tracks, {len(tracks.subtitles)} subtitle tracks")
        return tracks
    def get_chapters(self, title: Title_T) -> list[Chapter]:
@ -376,4 +441,4 @@ class VIDO(Service):
            error_summary = response.text[:200] if response.text else "No response body"
            raise Exception(f"License request failed ({response.status_code}): {error_summary}")
-        return response.content
+        return response.content