From 08d937c1c1002b8e67cc685c889da09d701139e4 Mon Sep 17 00:00:00 2001
From: FairTrade <zerogirlfriend@waifu.club>
Date: Mon, 1 Dec 2025 16:36:24 +0100
Subject: [PATCH] Added subtitle support for VIDO

---
 README.md        |   2 +-
 VIDO/__init__.py | 179 ++++++++++++++++++++++++++++++++---------------
 2 files changed, 123 insertions(+), 58 deletions(-)
diff --git a/README.md b/README.md
index 5f1e474..797c382 100644
--- a/README.md
+++ b/README.md
@@ -24,7 +24,7 @@
     6. VIKI
         - CSRF Token is now scraped, would be from a api requests soon
     7. VIDO
-        - Subtitle support
+        - Subtitle has little quirk of having javanese and sundanese language labeled on the HLS one but not the DASH one
         - Search functionality not available yet
     8. KNPY
         - Need to fix the search function
diff --git a/VIDO/__init__.py b/VIDO/__init__.py
index cb343bf..7e2b5b8 100644
--- a/VIDO/__init__.py
+++ b/VIDO/__init__.py
@@ -1,18 +1,19 @@
 import re
 import uuid
-import base64
+import xml.etree.ElementTree as ET
+from urllib.parse import urljoin
+from hashlib import md5
 from typing import Optional, Union
 from http.cookiejar import CookieJar
 from langcodes import Language
 
 import click
 
-from unshackle.core.search_result import SearchResult
 from unshackle.core.credential import Credential
 from unshackle.core.manifests import HLS, DASH
 from unshackle.core.service import Service
 from unshackle.core.titles import Episode, Movie, Movies, Series, Title_T, Titles_T
-from unshackle.core.tracks import Chapter, Tracks
+from unshackle.core.tracks import Chapter, Tracks, Subtitle
 from unshackle.core.constants import AnyTrack
 from datetime import datetime, timezone
 
@@ -20,22 +21,18 @@ from datetime import datetime, timezone
 class VIDO(Service):
     """
     Vidio.com service, Series and Movies, login required.
-    Version: 2.1.0
+    Version: 2.2.0
 
     Supports URLs like:
       • https://www.vidio.com/premier/2978/giligilis (Series)
       • https://www.vidio.com/watch/7454613-marantau-short-movie (Movie)
 
     Security: HD@L3 (Widevine DRM when available)
-
-    Note: Login is mandatory. Even free content requires valid session tokens
-          for stream access (as per API behavior).
     """
 
-    # Updated regex to support both series and movies
     TITLE_RE = r"^https?://(?:www\.)?vidio\.com/(?:premier|series|watch)/(?P<id>\d+)"
-    NO_SUBTITLES = True
     GEOFENCE = ("ID",)
+
     @staticmethod
     @click.command(name="VIDO", short_help="https://vidio.com (login required)")
     @click.argument("title", type=str)
@@ -51,7 +48,6 @@ class VIDO(Service):
             raise ValueError(f"Unsupported or invalid Vidio URL: {title}")
         self.content_id = match.group("id")
         
-        # Determine if it's a movie or series based on URL pattern
         self.is_movie = "watch" in title
 
         # Static app identifiers from Android traffic
@@ -77,10 +73,7 @@ class VIDO(Service):
         self._email = credential.username
         password = credential.password
 
-        # Define a unique key for this user's authentication tokens
         cache_key = f"auth_tokens_{self._email}"
-        
-        # Get a specific cache object for this key
         cache = self.cache.get(cache_key)
 
         # Check if valid tokens are already in the cache
@@ -89,7 +82,6 @@ class VIDO(Service):
             cached_data = cache.data
             self._user_token = cached_data.get("user_token")
             self._access_token = cached_data.get("access_token")
-            # If tokens were successfully loaded, we're done
             if self._user_token and self._access_token:
                 return
 
@@ -120,10 +112,11 @@ class VIDO(Service):
             expires_at_dt = datetime.fromisoformat(expires_at_str)
             now_utc = datetime.now(timezone.utc)
             expiration_in_seconds = max(0, int((expires_at_dt - now_utc).total_seconds()))
-            self.log.info(f"Token expires in {expiration_in_seconds / 60:.2f} minutes. Caching for this duration.")
+            self.log.info(f"Token expires in {expiration_in_seconds / 60:.2f} minutes.")
         except (KeyError, ValueError) as e:
-            self.log.warning(f"Could not parse token expiration time from API: {e}. Defaulting to 1 hour.")
-            expiration_in_seconds = 3600 # Fallback to 1 hour
+            self.log.warning(f"Could not parse token expiration: {e}. Defaulting to 1 hour.")
+            expiration_in_seconds = 3600
+
         cache.set({
             "user_token": self._user_token,
             "access_token": self._access_token
@@ -148,6 +141,66 @@ class VIDO(Service):
             "content-type": "application/vnd.api+json",
         }
 
+    def _extract_subtitles_from_mpd(self, mpd_url: str) -> list[Subtitle]:
+        """
+        Manually parse the MPD to extract subtitle tracks.
+        Handles plain VTT format (for free content).
+        """
+        subtitles = []
+        
+        try:
+            r = self.session.get(mpd_url)
+            r.raise_for_status()
+            mpd_content = r.text
+            
+            # Get base URL for resolving relative paths
+            base_url = mpd_url.rsplit('/', 1)[0] + '/'
+            
+            # Remove namespace for easier parsing
+            mpd_content_clean = re.sub(r'\sxmlns="[^"]+"', '', mpd_content)
+            root = ET.fromstring(mpd_content_clean)
+            
+            for adaptation_set in root.findall('.//AdaptationSet'):
+                content_type = adaptation_set.get('contentType', '')
+                
+                if content_type != 'text':
+                    continue
+                
+                lang = adaptation_set.get('lang', 'und')
+                
+                for rep in adaptation_set.findall('Representation'):
+                    mime_type = rep.get('mimeType', '')
+                    
+                    # Handle plain VTT (free content)
+                    if mime_type == 'text/vtt':
+                        segment_list = rep.find('SegmentList')
+                        if segment_list is not None:
+                            for segment_url in segment_list.findall('SegmentURL'):
+                                media = segment_url.get('media')
+                                if media:
+                                    full_url = urljoin(base_url, media)
+                                    
+                                    # Determine if auto-generated
+                                    is_auto = '-auto' in lang
+                                    clean_lang = lang.replace('-auto', '')
+                                    
+                                    subtitle = Subtitle(
+                                        id_=md5(full_url.encode()).hexdigest()[0:16],
+                                        url=full_url,
+                                        codec=Subtitle.Codec.WebVTT,
+                                        language=Language.get(clean_lang),
+                                        forced=False,
+                                        sdh=False,
+                                    )
+                                    
+                                    subtitles.append(subtitle)
+                                    self.log.debug(f"Found VTT subtitle: {lang} -> {full_url}")
+            
+        except Exception as e:
+            self.log.warning(f"Failed to extract subtitles from MPD: {e}")
+        
+        return subtitles
+
     def get_titles(self) -> Titles_T:
         headers = self._headers()
 
@@ -173,13 +226,11 @@ class VIDO(Service):
                 )
             ])
         else:
-            # Fetch the main content profile
             r = self.session.get(f"https://api.vidio.com/content_profiles/{self.content_id}", headers=headers)
             r.raise_for_status()
             root = r.json()["data"]
             series_title = root["attributes"]["title"]
 
-            # Fetch all playlists (seasons + extras)
             r_playlists = self.session.get(
                 f"https://api.vidio.com/content_profiles/{self.content_id}/playlists",
                 headers=headers
@@ -194,18 +245,15 @@ class VIDO(Service):
                     if group.get("type") == "season":
                         season_playlist_ids.update(group.get("playlist_ids", []))
 
-            # If no metadata, fall back to name-based detection
             season_playlists = []
             for pl in playlists_data["data"]:
                 playlist_id = int(pl["id"])
                 name = pl["attributes"]["name"].lower()
                 
-                # Use metadata if available, otherwise use name matching
                 if season_playlist_ids:
                     if playlist_id in season_playlist_ids:
                         season_playlists.append(pl)
                 else:
-                    # Fallback: match "season" but exclude "trailer" and "extra"
                     if ("season" in name or name == "episode" or name == "episodes") and \
                        "trailer" not in name and "extra" not in name:
                         season_playlists.append(pl)
@@ -213,14 +261,11 @@ class VIDO(Service):
             if not season_playlists:
                 raise ValueError("No season playlists found for this series.")
 
-            # Sort seasons and extract season numbers
             def extract_season_number(pl):
                 name = pl["attributes"]["name"]
-                # Try to extract number after "Season"
                 match = re.search(r"season\s*(\d+)", name, re.IGNORECASE)
                 if match:
                     return int(match.group(1))
-                # If it's just "Season" or "Episodes", treat as Season 1
                 elif name.lower() in ["season", "episodes", "episode"]:
                     return 1
                 else:
@@ -234,7 +279,6 @@ class VIDO(Service):
                 playlist_id = playlist["id"]
                 season_number = extract_season_number(playlist)
                 
-                # If season_number is 0, default to 1
                 if season_number == 0:
                     season_number = 1
                 
@@ -257,7 +301,6 @@ class VIDO(Service):
 
                     for raw_ep in page_data["data"]:
                         attrs = raw_ep["attributes"]
-                        # Count episodes within the same season
                         ep_number = len([e for e in all_episodes if e.season == season_number]) + 1
                         all_episodes.append(
                             Episode(
@@ -292,8 +335,8 @@ class VIDO(Service):
             "x-device-os": "Android 15 (API 35)",
             "x-device-android-mpc": "0",
             "x-device-cpu-arch": "arm64-v8a",
-            "x-device-platform": "android",                   
-            "x-app-version": "7.14.6-e4d1de87f2-3191683",     
+            "x-device-platform": "android",
+            "x-app-version": "7.14.6-e4d1de87f2-3191683",
         })
 
         video_id = str(title.id)
@@ -303,45 +346,67 @@ class VIDO(Service):
         r.raise_for_status()
         stream = r.json()
 
-        # Safety check: ensure stream is a valid dict
         if not isinstance(stream, dict):
-            raise ValueError("Vidio returned invalid stream data (not a JSON object). "
-                             "Content may be geo-blocked, subscription-restricted, or session expired.")
-
+            raise ValueError("Vidio returned invalid stream data.")
 
+        # Extract DRM info
         custom_data = stream.get("custom_data") or {}
         license_servers = stream.get("license_servers") or {}
         widevine_data = custom_data.get("widevine") if isinstance(custom_data, dict) else None
         license_url = license_servers.get("drm_license_url") if isinstance(license_servers, dict) else None
-        dash_url = stream.get("stream_dash_url")
-
-        has_valid_drm = bool(widevine_data and license_url and dash_url and isinstance(widevine_data, str))
-
-        if has_valid_drm:
+        
+        # Get stream URLs
+        dash_url = stream.get("stream_dash_url") or stream.get("stream_token_dash_url")
+        hls_url = stream.get("stream_hls_url") or stream.get("stream_token_hls_url")
+        
+        has_drm = widevine_data and license_url and dash_url and isinstance(widevine_data, str)
+        
+        if has_drm:
+            # DRM content: use DASH
             self.log.info("Widevine DRM detected, using DASH")
             self.custom_data = widevine_data
             self.license_url = license_url
             tracks = DASH.from_url(dash_url, session=self.session).to_tracks(language=title.language)
+            
+        elif hls_url:
+            # Non-DRM: use HLS for video/audio
+            self.log.info("No DRM detected, using HLS for video/audio")
+            self.custom_data = None
+            self.license_url = None
+            tracks = HLS.from_url(hls_url, session=self.session).to_tracks(language=title.language)
+            
+            # Clear HLS subtitles (they're segmented and incompatible)
+            if tracks.subtitles:
+                self.log.debug("Clearing HLS subtitles (incompatible format)")
+                tracks.subtitles.clear()
+            
+            # Get subtitles from DASH manifest (plain VTT)
+            if dash_url:
+                self.log.debug("Extracting subtitles from DASH manifest")
+                manual_subs = self._extract_subtitles_from_mpd(dash_url)
+                if manual_subs:
+                    for sub in manual_subs:
+                        tracks.add(sub)
+                    self.log.info(f"Added {len(manual_subs)} subtitle tracks from DASH")
+                    
+        elif dash_url:
+            # Fallback to DASH
+            self.log.warning("No HLS available, using DASH (VP9 codec)")
+            self.custom_data = None
+            self.license_url = None
+            tracks = DASH.from_url(dash_url, session=self.session).to_tracks(language=title.language)
+            
+            # Try manual subtitle extraction for non-DRM DASH
+            if not tracks.subtitles:
+                manual_subs = self._extract_subtitles_from_mpd(dash_url)
+                if manual_subs:
+                    for sub in manual_subs:
+                        tracks.add(sub)
         else:
-            # Prefer HLS for non-DRM (more reliable metadata, avoids frame_rate=None)
-            self.log.info("No valid Widevine DRM, using HLS")
-            hls_url = stream.get("stream_hls_url") or stream.get("stream_token_hls_url")
-            if hls_url:
-                self.log.debug(f"HLS URL: {hls_url}")
-                tracks = HLS.from_url(hls_url, session=self.session).to_tracks(language=title.language)
-            else:
-                # Last resort: non-DRM DASH (e.g., VP9), but warn user
-                dash_url = stream.get("stream_token_dash_url")
-                if dash_url:
-                    self.log.warning("HLS unavailable, falling back to non-DRM DASH (may lack frame rate metadata)")
-                    tracks = DASH.from_url(dash_url, session=self.session).to_tracks(language=title.language)
-                else:
-                    raise ValueError(
-                        "No playable stream (HLS or DASH) available. "
-                        "This episode may be restricted, unavailable, or require a higher subscription tier."
-                    )
+            raise ValueError("No playable stream (DASH or HLS) available.")
 
-        self.log.info(f"Found {len(tracks.videos)} video tracks, {len(tracks.audio)} audio tracks")
+        self.log.info(f"Found {len(tracks.videos)} video tracks, {len(tracks.audio)} audio tracks, {len(tracks.subtitles)} subtitle tracks")
+        
         return tracks
 
     def get_chapters(self, title: Title_T) -> list[Chapter]:
@@ -376,4 +441,4 @@ class VIDO(Service):
             error_summary = response.text[:200] if response.text else "No response body"
             raise Exception(f"License request failed ({response.status_code}): {error_summary}")
 
-        return response.content
\ No newline at end of file
+        return response.content