Added subtitle support for VIDO

2025-12-01 16:36:24 +01:00 · 2025-12-01 16:36:24 +01:00 · 08d937c1c1
commit 08d937c1c1
parent 7385ca91a0
2 changed files with 123 additions and 58 deletions
--- a/README.md
+++ b/README.md
@ -24,7 +24,7 @@
    6. VIKI
        - CSRF Token is now scraped, would be from a api requests soon
    7. VIDO
-        - Subtitle support
+        - Subtitle has little quirk of having javanese and sundanese language labeled on the HLS one but not the DASH one
        - Search functionality not available yet
    8. KNPY
        - Need to fix the search function
--- a/VIDO/init.py
+++ b/VIDO/init.py
@ -1,18 +1,19 @@
 import re
 import uuid
-import base64
+import xml.etree.ElementTree as ET
+from urllib.parse import urljoin
+from hashlib import md5
 from typing import Optional, Union
 from http.cookiejar import CookieJar
 from langcodes import Language

 import click

-from unshackle.core.search_result import SearchResult
 from unshackle.core.credential import Credential
 from unshackle.core.manifests import HLS, DASH
 from unshackle.core.service import Service
 from unshackle.core.titles import Episode, Movie, Movies, Series, Title_T, Titles_T
-from unshackle.core.tracks import Chapter, Tracks
+from unshackle.core.tracks import Chapter, Tracks, Subtitle
 from unshackle.core.constants import AnyTrack
 from datetime import datetime, timezone

@ -20,22 +21,18 @@ from datetime import datetime, timezone
 class VIDO(Service):
    """
    Vidio.com service, Series and Movies, login required.
-    Version: 2.1.0
+    Version: 2.2.0

    Supports URLs like:
      • https://www.vidio.com/premier/2978/giligilis (Series)
      • https://www.vidio.com/watch/7454613-marantau-short-movie (Movie)

    Security: HD@L3 (Widevine DRM when available)
-
-    Note: Login is mandatory. Even free content requires valid session tokens
-          for stream access (as per API behavior).
    """

-    # Updated regex to support both series and movies
    TITLE_RE = r"^https?://(?:www\.)?vidio\.com/(?:premier|series|watch)/(?P<id>\d+)"
-    NO_SUBTITLES = True
    GEOFENCE = ("ID",)
+
    @staticmethod
    @click.command(name="VIDO", short_help="https://vidio.com (login required)")
    @click.argument("title", type=str)
@ -51,7 +48,6 @@ class VIDO(Service):
            raise ValueError(f"Unsupported or invalid Vidio URL: {title}")
        self.content_id = match.group("id")
        
-        # Determine if it's a movie or series based on URL pattern
        self.is_movie = "watch" in title

        # Static app identifiers from Android traffic
@ -77,10 +73,7 @@ class VIDO(Service):
        self._email = credential.username
        password = credential.password

-        # Define a unique key for this user's authentication tokens
        cache_key = f"auth_tokens_{self._email}"
-        
-        # Get a specific cache object for this key
        cache = self.cache.get(cache_key)

        # Check if valid tokens are already in the cache
@ -89,7 +82,6 @@ class VIDO(Service):
            cached_data = cache.data
            self._user_token = cached_data.get("user_token")
            self._access_token = cached_data.get("access_token")
-            # If tokens were successfully loaded, we're done
            if self._user_token and self._access_token:
                return

@ -120,10 +112,11 @@ class VIDO(Service):
            expires_at_dt = datetime.fromisoformat(expires_at_str)
            now_utc = datetime.now(timezone.utc)
            expiration_in_seconds = max(0, int((expires_at_dt - now_utc).total_seconds()))
-            self.log.info(f"Token expires in {expiration_in_seconds / 60:.2f} minutes. Caching for this duration.")
+            self.log.info(f"Token expires in {expiration_in_seconds / 60:.2f} minutes.")
        except (KeyError, ValueError) as e:
-            self.log.warning(f"Could not parse token expiration time from API: {e}. Defaulting to 1 hour.")
-            expiration_in_seconds = 3600 # Fallback to 1 hour
+            self.log.warning(f"Could not parse token expiration: {e}. Defaulting to 1 hour.")
+            expiration_in_seconds = 3600
+
        cache.set({
            "user_token": self._user_token,
            "access_token": self._access_token
@ -148,6 +141,66 @@ class VIDO(Service):
            "content-type": "application/vnd.api+json",
        }

+    def _extract_subtitles_from_mpd(self, mpd_url: str) -> list[Subtitle]:
+        """
+        Manually parse the MPD to extract subtitle tracks.
+        Handles plain VTT format (for free content).
+        """
+        subtitles = []
+        
+        try:
+            r = self.session.get(mpd_url)
+            r.raise_for_status()
+            mpd_content = r.text
+            
+            # Get base URL for resolving relative paths
+            base_url = mpd_url.rsplit('/', 1)[0] + '/'
+            
+            # Remove namespace for easier parsing
+            mpd_content_clean = re.sub(r'\sxmlns="[^"]+"', '', mpd_content)
+            root = ET.fromstring(mpd_content_clean)
+            
+            for adaptation_set in root.findall('.//AdaptationSet'):
+                content_type = adaptation_set.get('contentType', '')
+                
+                if content_type != 'text':
+                    continue
+                
+                lang = adaptation_set.get('lang', 'und')
+                
+                for rep in adaptation_set.findall('Representation'):
+                    mime_type = rep.get('mimeType', '')
+                    
+                    # Handle plain VTT (free content)
+                    if mime_type == 'text/vtt':
+                        segment_list = rep.find('SegmentList')
+                        if segment_list is not None:
+                            for segment_url in segment_list.findall('SegmentURL'):
+                                media = segment_url.get('media')
+                                if media:
+                                    full_url = urljoin(base_url, media)
+                                    
+                                    # Determine if auto-generated
+                                    is_auto = '-auto' in lang
+                                    clean_lang = lang.replace('-auto', '')
+                                    
+                                    subtitle = Subtitle(
+                                        id_=md5(full_url.encode()).hexdigest()[0:16],
+                                        url=full_url,
+                                        codec=Subtitle.Codec.WebVTT,
+                                        language=Language.get(clean_lang),
+                                        forced=False,
+                                        sdh=False,
+                                    )
+                                    
+                                    subtitles.append(subtitle)
+                                    self.log.debug(f"Found VTT subtitle: {lang} -> {full_url}")
+            
+        except Exception as e:
+            self.log.warning(f"Failed to extract subtitles from MPD: {e}")
+        
+        return subtitles
+
    def get_titles(self) -> Titles_T:
        headers = self._headers()

@ -173,13 +226,11 @@ class VIDO(Service):
                )
            ])
        else:
-            # Fetch the main content profile
            r = self.session.get(f"https://api.vidio.com/content_profiles/{self.content_id}", headers=headers)
            r.raise_for_status()
            root = r.json()["data"]
            series_title = root["attributes"]["title"]

-            # Fetch all playlists (seasons + extras)
            r_playlists = self.session.get(
                f"https://api.vidio.com/content_profiles/{self.content_id}/playlists",
                headers=headers
@ -194,18 +245,15 @@ class VIDO(Service):
                    if group.get("type") == "season":
                        season_playlist_ids.update(group.get("playlist_ids", []))

-            # If no metadata, fall back to name-based detection
            season_playlists = []
            for pl in playlists_data["data"]:
                playlist_id = int(pl["id"])
                name = pl["attributes"]["name"].lower()
                
-                # Use metadata if available, otherwise use name matching
                if season_playlist_ids:
                    if playlist_id in season_playlist_ids:
                        season_playlists.append(pl)
                else:
-                    # Fallback: match "season" but exclude "trailer" and "extra"
                    if ("season" in name or name == "episode" or name == "episodes") and \
                       "trailer" not in name and "extra" not in name:
                        season_playlists.append(pl)
@ -213,14 +261,11 @@ class VIDO(Service):
            if not season_playlists:
                raise ValueError("No season playlists found for this series.")

-            # Sort seasons and extract season numbers
            def extract_season_number(pl):
                name = pl["attributes"]["name"]
-                # Try to extract number after "Season"
                match = re.search(r"season\s*(\d+)", name, re.IGNORECASE)
                if match:
                    return int(match.group(1))
-                # If it's just "Season" or "Episodes", treat as Season 1
                elif name.lower() in ["season", "episodes", "episode"]:
                    return 1
                else:
@ -234,7 +279,6 @@ class VIDO(Service):
                playlist_id = playlist["id"]
                season_number = extract_season_number(playlist)
                
-                # If season_number is 0, default to 1
                if season_number == 0:
                    season_number = 1
                
@ -257,7 +301,6 @@ class VIDO(Service):

                    for raw_ep in page_data["data"]:
                        attrs = raw_ep["attributes"]
-                        # Count episodes within the same season
                        ep_number = len([e for e in all_episodes if e.season == season_number]) + 1
                        all_episodes.append(
                            Episode(
@ -292,8 +335,8 @@ class VIDO(Service):
            "x-device-os": "Android 15 (API 35)",
            "x-device-android-mpc": "0",
            "x-device-cpu-arch": "arm64-v8a",
-            "x-device-platform": "android",                   
-            "x-app-version": "7.14.6-e4d1de87f2-3191683",     
+            "x-device-platform": "android",
+            "x-app-version": "7.14.6-e4d1de87f2-3191683",
        })

        video_id = str(title.id)
@ -303,45 +346,67 @@ class VIDO(Service):
        r.raise_for_status()
        stream = r.json()

-        # Safety check: ensure stream is a valid dict
        if not isinstance(stream, dict):
-            raise ValueError("Vidio returned invalid stream data (not a JSON object). "
-                             "Content may be geo-blocked, subscription-restricted, or session expired.")
-
+            raise ValueError("Vidio returned invalid stream data.")

+        # Extract DRM info
        custom_data = stream.get("custom_data") or {}
        license_servers = stream.get("license_servers") or {}
        widevine_data = custom_data.get("widevine") if isinstance(custom_data, dict) else None
        license_url = license_servers.get("drm_license_url") if isinstance(license_servers, dict) else None
-        dash_url = stream.get("stream_dash_url")
-
-        has_valid_drm = bool(widevine_data and license_url and dash_url and isinstance(widevine_data, str))
-
-        if has_valid_drm:
+        
+        # Get stream URLs
+        dash_url = stream.get("stream_dash_url") or stream.get("stream_token_dash_url")
+        hls_url = stream.get("stream_hls_url") or stream.get("stream_token_hls_url")
+        
+        has_drm = widevine_data and license_url and dash_url and isinstance(widevine_data, str)
+        
+        if has_drm:
+            # DRM content: use DASH
            self.log.info("Widevine DRM detected, using DASH")
            self.custom_data = widevine_data
            self.license_url = license_url
            tracks = DASH.from_url(dash_url, session=self.session).to_tracks(language=title.language)
+            
+        elif hls_url:
+            # Non-DRM: use HLS for video/audio
+            self.log.info("No DRM detected, using HLS for video/audio")
+            self.custom_data = None
+            self.license_url = None
+            tracks = HLS.from_url(hls_url, session=self.session).to_tracks(language=title.language)
+            
+            # Clear HLS subtitles (they're segmented and incompatible)
+            if tracks.subtitles:
+                self.log.debug("Clearing HLS subtitles (incompatible format)")
+                tracks.subtitles.clear()
+            
+            # Get subtitles from DASH manifest (plain VTT)
+            if dash_url:
+                self.log.debug("Extracting subtitles from DASH manifest")
+                manual_subs = self._extract_subtitles_from_mpd(dash_url)
+                if manual_subs:
+                    for sub in manual_subs:
+                        tracks.add(sub)
+                    self.log.info(f"Added {len(manual_subs)} subtitle tracks from DASH")
+                    
+        elif dash_url:
+            # Fallback to DASH
+            self.log.warning("No HLS available, using DASH (VP9 codec)")
+            self.custom_data = None
+            self.license_url = None
+            tracks = DASH.from_url(dash_url, session=self.session).to_tracks(language=title.language)
+            
+            # Try manual subtitle extraction for non-DRM DASH
+            if not tracks.subtitles:
+                manual_subs = self._extract_subtitles_from_mpd(dash_url)
+                if manual_subs:
+                    for sub in manual_subs:
+                        tracks.add(sub)
        else:
-            # Prefer HLS for non-DRM (more reliable metadata, avoids frame_rate=None)
-            self.log.info("No valid Widevine DRM, using HLS")
-            hls_url = stream.get("stream_hls_url") or stream.get("stream_token_hls_url")
-            if hls_url:
-                self.log.debug(f"HLS URL: {hls_url}")
-                tracks = HLS.from_url(hls_url, session=self.session).to_tracks(language=title.language)
-            else:
-                # Last resort: non-DRM DASH (e.g., VP9), but warn user
-                dash_url = stream.get("stream_token_dash_url")
-                if dash_url:
-                    self.log.warning("HLS unavailable, falling back to non-DRM DASH (may lack frame rate metadata)")
-                    tracks = DASH.from_url(dash_url, session=self.session).to_tracks(language=title.language)
-                else:
-                    raise ValueError(
-                        "No playable stream (HLS or DASH) available. "
-                        "This episode may be restricted, unavailable, or require a higher subscription tier."
-                    )
+            raise ValueError("No playable stream (DASH or HLS) available.")

-        self.log.info(f"Found {len(tracks.videos)} video tracks, {len(tracks.audio)} audio tracks")
+        self.log.info(f"Found {len(tracks.videos)} video tracks, {len(tracks.audio)} audio tracks, {len(tracks.subtitles)} subtitle tracks")
+        
        return tracks

    def get_chapters(self, title: Title_T) -> list[Chapter]:
@ -376,4 +441,4 @@ class VIDO(Service):
            error_summary = response.text[:200] if response.text else "No response body"
            raise Exception(f"License request failed ({response.status_code}): {error_summary}")

-        return response.content
+        return response.content