Added subtitle support for VIDO

This commit is contained in:
FairTrade 2025-12-01 16:36:24 +01:00
parent 7385ca91a0
commit 08d937c1c1
2 changed files with 123 additions and 58 deletions

View File

@ -24,7 +24,7 @@
6. VIKI 6. VIKI
- CSRF Token is now scraped, would be from a api requests soon - CSRF Token is now scraped, would be from a api requests soon
7. VIDO 7. VIDO
- Subtitle support - Subtitle has little quirk of having javanese and sundanese language labeled on the HLS one but not the DASH one
- Search functionality not available yet - Search functionality not available yet
8. KNPY 8. KNPY
- Need to fix the search function - Need to fix the search function

View File

@ -1,18 +1,19 @@
import re import re
import uuid import uuid
import base64 import xml.etree.ElementTree as ET
from urllib.parse import urljoin
from hashlib import md5
from typing import Optional, Union from typing import Optional, Union
from http.cookiejar import CookieJar from http.cookiejar import CookieJar
from langcodes import Language from langcodes import Language
import click import click
from unshackle.core.search_result import SearchResult
from unshackle.core.credential import Credential from unshackle.core.credential import Credential
from unshackle.core.manifests import HLS, DASH from unshackle.core.manifests import HLS, DASH
from unshackle.core.service import Service from unshackle.core.service import Service
from unshackle.core.titles import Episode, Movie, Movies, Series, Title_T, Titles_T from unshackle.core.titles import Episode, Movie, Movies, Series, Title_T, Titles_T
from unshackle.core.tracks import Chapter, Tracks from unshackle.core.tracks import Chapter, Tracks, Subtitle
from unshackle.core.constants import AnyTrack from unshackle.core.constants import AnyTrack
from datetime import datetime, timezone from datetime import datetime, timezone
@ -20,22 +21,18 @@ from datetime import datetime, timezone
class VIDO(Service): class VIDO(Service):
""" """
Vidio.com service, Series and Movies, login required. Vidio.com service, Series and Movies, login required.
Version: 2.1.0 Version: 2.2.0
Supports URLs like: Supports URLs like:
https://www.vidio.com/premier/2978/giligilis (Series) https://www.vidio.com/premier/2978/giligilis (Series)
https://www.vidio.com/watch/7454613-marantau-short-movie (Movie) https://www.vidio.com/watch/7454613-marantau-short-movie (Movie)
Security: HD@L3 (Widevine DRM when available) Security: HD@L3 (Widevine DRM when available)
Note: Login is mandatory. Even free content requires valid session tokens
for stream access (as per API behavior).
""" """
# Updated regex to support both series and movies
TITLE_RE = r"^https?://(?:www\.)?vidio\.com/(?:premier|series|watch)/(?P<id>\d+)" TITLE_RE = r"^https?://(?:www\.)?vidio\.com/(?:premier|series|watch)/(?P<id>\d+)"
NO_SUBTITLES = True
GEOFENCE = ("ID",) GEOFENCE = ("ID",)
@staticmethod @staticmethod
@click.command(name="VIDO", short_help="https://vidio.com (login required)") @click.command(name="VIDO", short_help="https://vidio.com (login required)")
@click.argument("title", type=str) @click.argument("title", type=str)
@ -51,7 +48,6 @@ class VIDO(Service):
raise ValueError(f"Unsupported or invalid Vidio URL: {title}") raise ValueError(f"Unsupported or invalid Vidio URL: {title}")
self.content_id = match.group("id") self.content_id = match.group("id")
# Determine if it's a movie or series based on URL pattern
self.is_movie = "watch" in title self.is_movie = "watch" in title
# Static app identifiers from Android traffic # Static app identifiers from Android traffic
@ -77,10 +73,7 @@ class VIDO(Service):
self._email = credential.username self._email = credential.username
password = credential.password password = credential.password
# Define a unique key for this user's authentication tokens
cache_key = f"auth_tokens_{self._email}" cache_key = f"auth_tokens_{self._email}"
# Get a specific cache object for this key
cache = self.cache.get(cache_key) cache = self.cache.get(cache_key)
# Check if valid tokens are already in the cache # Check if valid tokens are already in the cache
@ -89,7 +82,6 @@ class VIDO(Service):
cached_data = cache.data cached_data = cache.data
self._user_token = cached_data.get("user_token") self._user_token = cached_data.get("user_token")
self._access_token = cached_data.get("access_token") self._access_token = cached_data.get("access_token")
# If tokens were successfully loaded, we're done
if self._user_token and self._access_token: if self._user_token and self._access_token:
return return
@ -120,10 +112,11 @@ class VIDO(Service):
expires_at_dt = datetime.fromisoformat(expires_at_str) expires_at_dt = datetime.fromisoformat(expires_at_str)
now_utc = datetime.now(timezone.utc) now_utc = datetime.now(timezone.utc)
expiration_in_seconds = max(0, int((expires_at_dt - now_utc).total_seconds())) expiration_in_seconds = max(0, int((expires_at_dt - now_utc).total_seconds()))
self.log.info(f"Token expires in {expiration_in_seconds / 60:.2f} minutes. Caching for this duration.") self.log.info(f"Token expires in {expiration_in_seconds / 60:.2f} minutes.")
except (KeyError, ValueError) as e: except (KeyError, ValueError) as e:
self.log.warning(f"Could not parse token expiration time from API: {e}. Defaulting to 1 hour.") self.log.warning(f"Could not parse token expiration: {e}. Defaulting to 1 hour.")
expiration_in_seconds = 3600 # Fallback to 1 hour expiration_in_seconds = 3600
cache.set({ cache.set({
"user_token": self._user_token, "user_token": self._user_token,
"access_token": self._access_token "access_token": self._access_token
@ -148,6 +141,66 @@ class VIDO(Service):
"content-type": "application/vnd.api+json", "content-type": "application/vnd.api+json",
} }
def _extract_subtitles_from_mpd(self, mpd_url: str) -> list[Subtitle]:
"""
Manually parse the MPD to extract subtitle tracks.
Handles plain VTT format (for free content).
"""
subtitles = []
try:
r = self.session.get(mpd_url)
r.raise_for_status()
mpd_content = r.text
# Get base URL for resolving relative paths
base_url = mpd_url.rsplit('/', 1)[0] + '/'
# Remove namespace for easier parsing
mpd_content_clean = re.sub(r'\sxmlns="[^"]+"', '', mpd_content)
root = ET.fromstring(mpd_content_clean)
for adaptation_set in root.findall('.//AdaptationSet'):
content_type = adaptation_set.get('contentType', '')
if content_type != 'text':
continue
lang = adaptation_set.get('lang', 'und')
for rep in adaptation_set.findall('Representation'):
mime_type = rep.get('mimeType', '')
# Handle plain VTT (free content)
if mime_type == 'text/vtt':
segment_list = rep.find('SegmentList')
if segment_list is not None:
for segment_url in segment_list.findall('SegmentURL'):
media = segment_url.get('media')
if media:
full_url = urljoin(base_url, media)
# Determine if auto-generated
is_auto = '-auto' in lang
clean_lang = lang.replace('-auto', '')
subtitle = Subtitle(
id_=md5(full_url.encode()).hexdigest()[0:16],
url=full_url,
codec=Subtitle.Codec.WebVTT,
language=Language.get(clean_lang),
forced=False,
sdh=False,
)
subtitles.append(subtitle)
self.log.debug(f"Found VTT subtitle: {lang} -> {full_url}")
except Exception as e:
self.log.warning(f"Failed to extract subtitles from MPD: {e}")
return subtitles
def get_titles(self) -> Titles_T: def get_titles(self) -> Titles_T:
headers = self._headers() headers = self._headers()
@ -173,13 +226,11 @@ class VIDO(Service):
) )
]) ])
else: else:
# Fetch the main content profile
r = self.session.get(f"https://api.vidio.com/content_profiles/{self.content_id}", headers=headers) r = self.session.get(f"https://api.vidio.com/content_profiles/{self.content_id}", headers=headers)
r.raise_for_status() r.raise_for_status()
root = r.json()["data"] root = r.json()["data"]
series_title = root["attributes"]["title"] series_title = root["attributes"]["title"]
# Fetch all playlists (seasons + extras)
r_playlists = self.session.get( r_playlists = self.session.get(
f"https://api.vidio.com/content_profiles/{self.content_id}/playlists", f"https://api.vidio.com/content_profiles/{self.content_id}/playlists",
headers=headers headers=headers
@ -194,18 +245,15 @@ class VIDO(Service):
if group.get("type") == "season": if group.get("type") == "season":
season_playlist_ids.update(group.get("playlist_ids", [])) season_playlist_ids.update(group.get("playlist_ids", []))
# If no metadata, fall back to name-based detection
season_playlists = [] season_playlists = []
for pl in playlists_data["data"]: for pl in playlists_data["data"]:
playlist_id = int(pl["id"]) playlist_id = int(pl["id"])
name = pl["attributes"]["name"].lower() name = pl["attributes"]["name"].lower()
# Use metadata if available, otherwise use name matching
if season_playlist_ids: if season_playlist_ids:
if playlist_id in season_playlist_ids: if playlist_id in season_playlist_ids:
season_playlists.append(pl) season_playlists.append(pl)
else: else:
# Fallback: match "season" but exclude "trailer" and "extra"
if ("season" in name or name == "episode" or name == "episodes") and \ if ("season" in name or name == "episode" or name == "episodes") and \
"trailer" not in name and "extra" not in name: "trailer" not in name and "extra" not in name:
season_playlists.append(pl) season_playlists.append(pl)
@ -213,14 +261,11 @@ class VIDO(Service):
if not season_playlists: if not season_playlists:
raise ValueError("No season playlists found for this series.") raise ValueError("No season playlists found for this series.")
# Sort seasons and extract season numbers
def extract_season_number(pl): def extract_season_number(pl):
name = pl["attributes"]["name"] name = pl["attributes"]["name"]
# Try to extract number after "Season"
match = re.search(r"season\s*(\d+)", name, re.IGNORECASE) match = re.search(r"season\s*(\d+)", name, re.IGNORECASE)
if match: if match:
return int(match.group(1)) return int(match.group(1))
# If it's just "Season" or "Episodes", treat as Season 1
elif name.lower() in ["season", "episodes", "episode"]: elif name.lower() in ["season", "episodes", "episode"]:
return 1 return 1
else: else:
@ -234,7 +279,6 @@ class VIDO(Service):
playlist_id = playlist["id"] playlist_id = playlist["id"]
season_number = extract_season_number(playlist) season_number = extract_season_number(playlist)
# If season_number is 0, default to 1
if season_number == 0: if season_number == 0:
season_number = 1 season_number = 1
@ -257,7 +301,6 @@ class VIDO(Service):
for raw_ep in page_data["data"]: for raw_ep in page_data["data"]:
attrs = raw_ep["attributes"] attrs = raw_ep["attributes"]
# Count episodes within the same season
ep_number = len([e for e in all_episodes if e.season == season_number]) + 1 ep_number = len([e for e in all_episodes if e.season == season_number]) + 1
all_episodes.append( all_episodes.append(
Episode( Episode(
@ -292,8 +335,8 @@ class VIDO(Service):
"x-device-os": "Android 15 (API 35)", "x-device-os": "Android 15 (API 35)",
"x-device-android-mpc": "0", "x-device-android-mpc": "0",
"x-device-cpu-arch": "arm64-v8a", "x-device-cpu-arch": "arm64-v8a",
"x-device-platform": "android", "x-device-platform": "android",
"x-app-version": "7.14.6-e4d1de87f2-3191683", "x-app-version": "7.14.6-e4d1de87f2-3191683",
}) })
video_id = str(title.id) video_id = str(title.id)
@ -303,45 +346,67 @@ class VIDO(Service):
r.raise_for_status() r.raise_for_status()
stream = r.json() stream = r.json()
# Safety check: ensure stream is a valid dict
if not isinstance(stream, dict): if not isinstance(stream, dict):
raise ValueError("Vidio returned invalid stream data (not a JSON object). " raise ValueError("Vidio returned invalid stream data.")
"Content may be geo-blocked, subscription-restricted, or session expired.")
# Extract DRM info
custom_data = stream.get("custom_data") or {} custom_data = stream.get("custom_data") or {}
license_servers = stream.get("license_servers") or {} license_servers = stream.get("license_servers") or {}
widevine_data = custom_data.get("widevine") if isinstance(custom_data, dict) else None widevine_data = custom_data.get("widevine") if isinstance(custom_data, dict) else None
license_url = license_servers.get("drm_license_url") if isinstance(license_servers, dict) else None license_url = license_servers.get("drm_license_url") if isinstance(license_servers, dict) else None
dash_url = stream.get("stream_dash_url")
# Get stream URLs
has_valid_drm = bool(widevine_data and license_url and dash_url and isinstance(widevine_data, str)) dash_url = stream.get("stream_dash_url") or stream.get("stream_token_dash_url")
hls_url = stream.get("stream_hls_url") or stream.get("stream_token_hls_url")
if has_valid_drm:
has_drm = widevine_data and license_url and dash_url and isinstance(widevine_data, str)
if has_drm:
# DRM content: use DASH
self.log.info("Widevine DRM detected, using DASH") self.log.info("Widevine DRM detected, using DASH")
self.custom_data = widevine_data self.custom_data = widevine_data
self.license_url = license_url self.license_url = license_url
tracks = DASH.from_url(dash_url, session=self.session).to_tracks(language=title.language) tracks = DASH.from_url(dash_url, session=self.session).to_tracks(language=title.language)
elif hls_url:
# Non-DRM: use HLS for video/audio
self.log.info("No DRM detected, using HLS for video/audio")
self.custom_data = None
self.license_url = None
tracks = HLS.from_url(hls_url, session=self.session).to_tracks(language=title.language)
# Clear HLS subtitles (they're segmented and incompatible)
if tracks.subtitles:
self.log.debug("Clearing HLS subtitles (incompatible format)")
tracks.subtitles.clear()
# Get subtitles from DASH manifest (plain VTT)
if dash_url:
self.log.debug("Extracting subtitles from DASH manifest")
manual_subs = self._extract_subtitles_from_mpd(dash_url)
if manual_subs:
for sub in manual_subs:
tracks.add(sub)
self.log.info(f"Added {len(manual_subs)} subtitle tracks from DASH")
elif dash_url:
# Fallback to DASH
self.log.warning("No HLS available, using DASH (VP9 codec)")
self.custom_data = None
self.license_url = None
tracks = DASH.from_url(dash_url, session=self.session).to_tracks(language=title.language)
# Try manual subtitle extraction for non-DRM DASH
if not tracks.subtitles:
manual_subs = self._extract_subtitles_from_mpd(dash_url)
if manual_subs:
for sub in manual_subs:
tracks.add(sub)
else: else:
# Prefer HLS for non-DRM (more reliable metadata, avoids frame_rate=None) raise ValueError("No playable stream (DASH or HLS) available.")
self.log.info("No valid Widevine DRM, using HLS")
hls_url = stream.get("stream_hls_url") or stream.get("stream_token_hls_url")
if hls_url:
self.log.debug(f"HLS URL: {hls_url}")
tracks = HLS.from_url(hls_url, session=self.session).to_tracks(language=title.language)
else:
# Last resort: non-DRM DASH (e.g., VP9), but warn user
dash_url = stream.get("stream_token_dash_url")
if dash_url:
self.log.warning("HLS unavailable, falling back to non-DRM DASH (may lack frame rate metadata)")
tracks = DASH.from_url(dash_url, session=self.session).to_tracks(language=title.language)
else:
raise ValueError(
"No playable stream (HLS or DASH) available. "
"This episode may be restricted, unavailable, or require a higher subscription tier."
)
self.log.info(f"Found {len(tracks.videos)} video tracks, {len(tracks.audio)} audio tracks") self.log.info(f"Found {len(tracks.videos)} video tracks, {len(tracks.audio)} audio tracks, {len(tracks.subtitles)} subtitle tracks")
return tracks return tracks
def get_chapters(self, title: Title_T) -> list[Chapter]: def get_chapters(self, title: Title_T) -> list[Chapter]:
@ -376,4 +441,4 @@ class VIDO(Service):
error_summary = response.text[:200] if response.text else "No response body" error_summary = response.text[:200] if response.text else "No response body"
raise Exception(f"License request failed ({response.status_code}): {error_summary}") raise Exception(f"License request failed ({response.status_code}): {error_summary}")
return response.content return response.content