Added subtitle support for VIDO

This commit is contained in:
FairTrade 2025-12-01 16:36:24 +01:00
parent 7385ca91a0
commit 08d937c1c1
2 changed files with 123 additions and 58 deletions

View File

@ -24,7 +24,7 @@
6. VIKI
- CSRF Token is now scraped, would be from a api requests soon
7. VIDO
- Subtitle support
- Subtitle has little quirk of having javanese and sundanese language labeled on the HLS one but not the DASH one
- Search functionality not available yet
8. KNPY
- Need to fix the search function

View File

@ -1,18 +1,19 @@
import re
import uuid
import base64
import xml.etree.ElementTree as ET
from urllib.parse import urljoin
from hashlib import md5
from typing import Optional, Union
from http.cookiejar import CookieJar
from langcodes import Language
import click
from unshackle.core.search_result import SearchResult
from unshackle.core.credential import Credential
from unshackle.core.manifests import HLS, DASH
from unshackle.core.service import Service
from unshackle.core.titles import Episode, Movie, Movies, Series, Title_T, Titles_T
from unshackle.core.tracks import Chapter, Tracks
from unshackle.core.tracks import Chapter, Tracks, Subtitle
from unshackle.core.constants import AnyTrack
from datetime import datetime, timezone
@ -20,22 +21,18 @@ from datetime import datetime, timezone
class VIDO(Service):
"""
Vidio.com service, Series and Movies, login required.
Version: 2.1.0
Version: 2.2.0
Supports URLs like:
https://www.vidio.com/premier/2978/giligilis (Series)
https://www.vidio.com/watch/7454613-marantau-short-movie (Movie)
Security: HD@L3 (Widevine DRM when available)
Note: Login is mandatory. Even free content requires valid session tokens
for stream access (as per API behavior).
"""
# Updated regex to support both series and movies
TITLE_RE = r"^https?://(?:www\.)?vidio\.com/(?:premier|series|watch)/(?P<id>\d+)"
NO_SUBTITLES = True
GEOFENCE = ("ID",)
@staticmethod
@click.command(name="VIDO", short_help="https://vidio.com (login required)")
@click.argument("title", type=str)
@ -51,7 +48,6 @@ class VIDO(Service):
raise ValueError(f"Unsupported or invalid Vidio URL: {title}")
self.content_id = match.group("id")
# Determine if it's a movie or series based on URL pattern
self.is_movie = "watch" in title
# Static app identifiers from Android traffic
@ -77,10 +73,7 @@ class VIDO(Service):
self._email = credential.username
password = credential.password
# Define a unique key for this user's authentication tokens
cache_key = f"auth_tokens_{self._email}"
# Get a specific cache object for this key
cache = self.cache.get(cache_key)
# Check if valid tokens are already in the cache
@ -89,7 +82,6 @@ class VIDO(Service):
cached_data = cache.data
self._user_token = cached_data.get("user_token")
self._access_token = cached_data.get("access_token")
# If tokens were successfully loaded, we're done
if self._user_token and self._access_token:
return
@ -120,10 +112,11 @@ class VIDO(Service):
expires_at_dt = datetime.fromisoformat(expires_at_str)
now_utc = datetime.now(timezone.utc)
expiration_in_seconds = max(0, int((expires_at_dt - now_utc).total_seconds()))
self.log.info(f"Token expires in {expiration_in_seconds / 60:.2f} minutes. Caching for this duration.")
self.log.info(f"Token expires in {expiration_in_seconds / 60:.2f} minutes.")
except (KeyError, ValueError) as e:
self.log.warning(f"Could not parse token expiration time from API: {e}. Defaulting to 1 hour.")
expiration_in_seconds = 3600 # Fallback to 1 hour
self.log.warning(f"Could not parse token expiration: {e}. Defaulting to 1 hour.")
expiration_in_seconds = 3600
cache.set({
"user_token": self._user_token,
"access_token": self._access_token
@ -148,6 +141,66 @@ class VIDO(Service):
"content-type": "application/vnd.api+json",
}
def _extract_subtitles_from_mpd(self, mpd_url: str) -> list[Subtitle]:
"""
Manually parse the MPD to extract subtitle tracks.
Handles plain VTT format (for free content).
"""
subtitles = []
try:
r = self.session.get(mpd_url)
r.raise_for_status()
mpd_content = r.text
# Get base URL for resolving relative paths
base_url = mpd_url.rsplit('/', 1)[0] + '/'
# Remove namespace for easier parsing
mpd_content_clean = re.sub(r'\sxmlns="[^"]+"', '', mpd_content)
root = ET.fromstring(mpd_content_clean)
for adaptation_set in root.findall('.//AdaptationSet'):
content_type = adaptation_set.get('contentType', '')
if content_type != 'text':
continue
lang = adaptation_set.get('lang', 'und')
for rep in adaptation_set.findall('Representation'):
mime_type = rep.get('mimeType', '')
# Handle plain VTT (free content)
if mime_type == 'text/vtt':
segment_list = rep.find('SegmentList')
if segment_list is not None:
for segment_url in segment_list.findall('SegmentURL'):
media = segment_url.get('media')
if media:
full_url = urljoin(base_url, media)
# Determine if auto-generated
is_auto = '-auto' in lang
clean_lang = lang.replace('-auto', '')
subtitle = Subtitle(
id_=md5(full_url.encode()).hexdigest()[0:16],
url=full_url,
codec=Subtitle.Codec.WebVTT,
language=Language.get(clean_lang),
forced=False,
sdh=False,
)
subtitles.append(subtitle)
self.log.debug(f"Found VTT subtitle: {lang} -> {full_url}")
except Exception as e:
self.log.warning(f"Failed to extract subtitles from MPD: {e}")
return subtitles
def get_titles(self) -> Titles_T:
headers = self._headers()
@ -173,13 +226,11 @@ class VIDO(Service):
)
])
else:
# Fetch the main content profile
r = self.session.get(f"https://api.vidio.com/content_profiles/{self.content_id}", headers=headers)
r.raise_for_status()
root = r.json()["data"]
series_title = root["attributes"]["title"]
# Fetch all playlists (seasons + extras)
r_playlists = self.session.get(
f"https://api.vidio.com/content_profiles/{self.content_id}/playlists",
headers=headers
@ -194,18 +245,15 @@ class VIDO(Service):
if group.get("type") == "season":
season_playlist_ids.update(group.get("playlist_ids", []))
# If no metadata, fall back to name-based detection
season_playlists = []
for pl in playlists_data["data"]:
playlist_id = int(pl["id"])
name = pl["attributes"]["name"].lower()
# Use metadata if available, otherwise use name matching
if season_playlist_ids:
if playlist_id in season_playlist_ids:
season_playlists.append(pl)
else:
# Fallback: match "season" but exclude "trailer" and "extra"
if ("season" in name or name == "episode" or name == "episodes") and \
"trailer" not in name and "extra" not in name:
season_playlists.append(pl)
@ -213,14 +261,11 @@ class VIDO(Service):
if not season_playlists:
raise ValueError("No season playlists found for this series.")
# Sort seasons and extract season numbers
def extract_season_number(pl):
name = pl["attributes"]["name"]
# Try to extract number after "Season"
match = re.search(r"season\s*(\d+)", name, re.IGNORECASE)
if match:
return int(match.group(1))
# If it's just "Season" or "Episodes", treat as Season 1
elif name.lower() in ["season", "episodes", "episode"]:
return 1
else:
@ -234,7 +279,6 @@ class VIDO(Service):
playlist_id = playlist["id"]
season_number = extract_season_number(playlist)
# If season_number is 0, default to 1
if season_number == 0:
season_number = 1
@ -257,7 +301,6 @@ class VIDO(Service):
for raw_ep in page_data["data"]:
attrs = raw_ep["attributes"]
# Count episodes within the same season
ep_number = len([e for e in all_episodes if e.season == season_number]) + 1
all_episodes.append(
Episode(
@ -292,8 +335,8 @@ class VIDO(Service):
"x-device-os": "Android 15 (API 35)",
"x-device-android-mpc": "0",
"x-device-cpu-arch": "arm64-v8a",
"x-device-platform": "android",
"x-app-version": "7.14.6-e4d1de87f2-3191683",
"x-device-platform": "android",
"x-app-version": "7.14.6-e4d1de87f2-3191683",
})
video_id = str(title.id)
@ -303,45 +346,67 @@ class VIDO(Service):
r.raise_for_status()
stream = r.json()
# Safety check: ensure stream is a valid dict
if not isinstance(stream, dict):
raise ValueError("Vidio returned invalid stream data (not a JSON object). "
"Content may be geo-blocked, subscription-restricted, or session expired.")
raise ValueError("Vidio returned invalid stream data.")
# Extract DRM info
custom_data = stream.get("custom_data") or {}
license_servers = stream.get("license_servers") or {}
widevine_data = custom_data.get("widevine") if isinstance(custom_data, dict) else None
license_url = license_servers.get("drm_license_url") if isinstance(license_servers, dict) else None
dash_url = stream.get("stream_dash_url")
has_valid_drm = bool(widevine_data and license_url and dash_url and isinstance(widevine_data, str))
if has_valid_drm:
# Get stream URLs
dash_url = stream.get("stream_dash_url") or stream.get("stream_token_dash_url")
hls_url = stream.get("stream_hls_url") or stream.get("stream_token_hls_url")
has_drm = widevine_data and license_url and dash_url and isinstance(widevine_data, str)
if has_drm:
# DRM content: use DASH
self.log.info("Widevine DRM detected, using DASH")
self.custom_data = widevine_data
self.license_url = license_url
tracks = DASH.from_url(dash_url, session=self.session).to_tracks(language=title.language)
elif hls_url:
# Non-DRM: use HLS for video/audio
self.log.info("No DRM detected, using HLS for video/audio")
self.custom_data = None
self.license_url = None
tracks = HLS.from_url(hls_url, session=self.session).to_tracks(language=title.language)
# Clear HLS subtitles (they're segmented and incompatible)
if tracks.subtitles:
self.log.debug("Clearing HLS subtitles (incompatible format)")
tracks.subtitles.clear()
# Get subtitles from DASH manifest (plain VTT)
if dash_url:
self.log.debug("Extracting subtitles from DASH manifest")
manual_subs = self._extract_subtitles_from_mpd(dash_url)
if manual_subs:
for sub in manual_subs:
tracks.add(sub)
self.log.info(f"Added {len(manual_subs)} subtitle tracks from DASH")
elif dash_url:
# Fallback to DASH
self.log.warning("No HLS available, using DASH (VP9 codec)")
self.custom_data = None
self.license_url = None
tracks = DASH.from_url(dash_url, session=self.session).to_tracks(language=title.language)
# Try manual subtitle extraction for non-DRM DASH
if not tracks.subtitles:
manual_subs = self._extract_subtitles_from_mpd(dash_url)
if manual_subs:
for sub in manual_subs:
tracks.add(sub)
else:
# Prefer HLS for non-DRM (more reliable metadata, avoids frame_rate=None)
self.log.info("No valid Widevine DRM, using HLS")
hls_url = stream.get("stream_hls_url") or stream.get("stream_token_hls_url")
if hls_url:
self.log.debug(f"HLS URL: {hls_url}")
tracks = HLS.from_url(hls_url, session=self.session).to_tracks(language=title.language)
else:
# Last resort: non-DRM DASH (e.g., VP9), but warn user
dash_url = stream.get("stream_token_dash_url")
if dash_url:
self.log.warning("HLS unavailable, falling back to non-DRM DASH (may lack frame rate metadata)")
tracks = DASH.from_url(dash_url, session=self.session).to_tracks(language=title.language)
else:
raise ValueError(
"No playable stream (HLS or DASH) available. "
"This episode may be restricted, unavailable, or require a higher subscription tier."
)
raise ValueError("No playable stream (DASH or HLS) available.")
self.log.info(f"Found {len(tracks.videos)} video tracks, {len(tracks.audio)} audio tracks")
self.log.info(f"Found {len(tracks.videos)} video tracks, {len(tracks.audio)} audio tracks, {len(tracks.subtitles)} subtitle tracks")
return tracks
def get_chapters(self, title: Title_T) -> list[Chapter]:
@ -376,4 +441,4 @@ class VIDO(Service):
error_summary = response.text[:200] if response.text else "No response body"
raise Exception(f"License request failed ({response.status_code}): {error_summary}")
return response.content
return response.content