unshackle-services/MUBI/__init__.py

453 lines
18 KiB
Python
Raw Normal View History

2025-11-10 18:37:40 +01:00
import json
import re
import uuid
from http.cookiejar import CookieJar
from typing import Optional, Generator
from langcodes import Language
import base64
import click
from unshackle.core.constants import AnyTrack
from unshackle.core.manifests import DASH
from unshackle.core.service import Service
2026-02-06 17:02:24 +01:00
from unshackle.core.credential import Credential
2025-11-10 18:37:40 +01:00
from unshackle.core.titles import Episode, Movie, Movies, Title_T, Titles_T, Series
from unshackle.core.tracks import Chapter, Tracks, Subtitle
2026-02-06 17:02:24 +01:00
from unshackle.core.search_result import SearchResult
2025-11-10 18:37:40 +01:00
class MUBI(Service):
"""
Service code for MUBI (mubi.com)
2026-02-06 17:02:24 +01:00
Version: 1.2.1 (Cookie-only + Auto-UHD + Search)
Authorization: Cookies ONLY (lt token + _mubi_session)
Security: UHD @ L3/SL2K (Widevine/PlayReady)
2025-11-12 11:13:45 +01:00
Supports:
2026-02-06 17:02:24 +01:00
Series https://mubi.com/en/nl/series/twin-peaks
Movies https://mubi.com/en/nl/films/the-substance
2025-11-10 18:37:40 +01:00
"""
SERIES_TITLE_RE = r"^https?://(?:www\.)?mubi\.com(?:/[^/]+)*?/series/(?P<series_slug>[^/]+)(?:/season/(?P<season_slug>[^/]+))?$"
TITLE_RE = r"^(?:https?://(?:www\.)?mubi\.com)(?:/[^/]+)*?/films/(?P<slug>[^/?#]+)$"
NO_SUBTITLES = False
@staticmethod
2026-02-06 17:02:24 +01:00
@click.command(name="MUBI", short_help="https://mubi.com ")
2025-11-10 18:37:40 +01:00
@click.argument("title", type=str)
2026-02-06 17:02:24 +01:00
@click.option("-c", "--country", default=None, type=str,
help="With VPN set country code other than the one assigned to the account.")
2025-11-10 18:37:40 +01:00
@click.pass_context
def cli(ctx, **kwargs):
return MUBI(ctx, **kwargs)
2026-02-06 17:02:24 +01:00
def __init__(self, ctx, title: str, country: str):
2025-11-10 18:37:40 +01:00
super().__init__(ctx)
2026-02-06 17:02:24 +01:00
self.raw_title = title # Store raw input for search mode
self.country = country
# Only parse as URL if it matches MUBI patterns
2025-11-10 18:37:40 +01:00
m_film = re.match(self.TITLE_RE, title)
m_series = re.match(self.SERIES_TITLE_RE, title)
2026-02-06 17:02:24 +01:00
2025-11-10 18:37:40 +01:00
self.is_series = bool(m_series)
self.slug = m_film.group("slug") if m_film else None
self.series_slug = m_series.group("series_slug") if m_series else None
self.season_slug = m_series.group("season_slug") if m_series else None
2026-02-06 17:02:24 +01:00
# Core state
2025-11-10 18:37:40 +01:00
self.film_id: Optional[int] = None
self.lt_token: Optional[str] = None
self.session_token: Optional[str] = None
self.user_id: Optional[int] = None
self.country_code: Optional[str] = None
2026-02-06 17:02:24 +01:00
self.set_country_code: Optional[str] = country
2025-11-10 18:37:40 +01:00
self.anonymous_user_id: Optional[str] = None
self.default_country: Optional[str] = None
2026-02-06 17:02:24 +01:00
self.reels_data: Optional[list] = None
# ALWAYS enable UHD/HEVC path - no user flag required
self.uhd = True
2025-11-10 18:37:40 +01:00
self.cdm = ctx.obj.cdm
2026-02-06 17:02:24 +01:00
2025-11-10 18:37:40 +01:00
if self.config is None:
raise EnvironmentError("Missing service config for MUBI.")
def authenticate(self, cookies: Optional[CookieJar] = None, credential: Optional[Credential] = None) -> None:
super().authenticate(cookies, credential)
2026-02-06 17:02:24 +01:00
if not cookies:
raise PermissionError("MUBI requires login cookies (lt + _mubi_session). Credentials login is not supported.")
# IP geolocation for country detection
2025-11-10 18:37:40 +01:00
try:
r_ip = self.session.get(self.config["endpoints"]["ip_geolocation"], timeout=5)
r_ip.raise_for_status()
ip_data = r_ip.json()
if ip_data.get("country"):
self.default_country = ip_data["country"]
self.log.debug(f"Detected country from IP: {self.default_country}")
else:
self.log.warning("IP geolocation response did not contain a country code.")
except Exception as e:
raise ValueError(f"Failed to fetch IP geolocation: {e}")
2026-02-06 17:02:24 +01:00
# Extract essential tokens from cookies
2025-11-10 18:37:40 +01:00
lt_cookie = next((c for c in cookies if c.name == "lt"), None)
session_cookie = next((c for c in cookies if c.name == "_mubi_session"), None)
snow_id_cookie = next((c for c in cookies if c.name == "_snow_id.c006"), None)
2026-02-06 17:02:24 +01:00
2025-11-10 18:37:40 +01:00
if not lt_cookie:
raise PermissionError("Missing 'lt' cookie (Bearer token).")
if not session_cookie:
raise PermissionError("Missing '_mubi_session' cookie.")
2026-02-06 17:02:24 +01:00
2025-11-10 18:37:40 +01:00
self.lt_token = lt_cookie.value
self.session_token = session_cookie.value
2026-02-06 17:02:24 +01:00
# Extract or generate anonymous_user_id
2025-11-10 18:37:40 +01:00
if snow_id_cookie and "." in snow_id_cookie.value:
self.anonymous_user_id = snow_id_cookie.value.split(".")[0]
else:
self.anonymous_user_id = str(uuid.uuid4())
self.log.warning(f"No _snow_id.c006 cookie found — generated new anonymous_user_id: {self.anonymous_user_id}")
2026-02-06 17:02:24 +01:00
# Configure session headers for UHD access
2025-11-10 18:37:40 +01:00
base_headers = {
2026-02-06 17:02:24 +01:00
"User-Agent": "Mozilla/5.0 (Linux; Android 13; SM-G975F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36",
2025-11-10 18:37:40 +01:00
"Origin": "https://mubi.com",
"Referer": "https://mubi.com/",
"CLIENT": "web",
"Client-Accept-Video-Codecs": "h265,vp9,h264",
2026-02-06 17:02:24 +01:00
"Client-Accept-Audio-Codecs": "eac3,ac3,aac",
2025-11-10 18:37:40 +01:00
"Authorization": f"Bearer {self.lt_token}",
"ANONYMOUS_USER_ID": self.anonymous_user_id,
"Client-Country": self.default_country,
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"Pragma": "no-cache",
"Cache-Control": "no-cache",
}
self.session.headers.update(base_headers)
2026-02-06 17:02:24 +01:00
# Fetch account info
2025-11-10 18:37:40 +01:00
r_account = self.session.get(self.config["endpoints"]["account"])
if not r_account.ok:
raise PermissionError(f"Failed to fetch MUBI account: {r_account.status_code} {r_account.text}")
2026-02-06 17:02:24 +01:00
2025-11-10 18:37:40 +01:00
account_data = r_account.json()
self.user_id = account_data.get("id")
self.country_code = (account_data.get("country") or {}).get("code", "NL")
2026-02-06 17:02:24 +01:00
if self.set_country_code is not None:
self.country_code = self.set_country_code.upper()
2025-11-10 18:37:40 +01:00
self.session.headers["Client-Country"] = self.country_code
self.GEOFENCE = (self.country_code,)
self._bind_anonymous_user()
2026-02-06 17:02:24 +01:00
2025-11-10 18:37:40 +01:00
self.log.info(
f"Authenticated as user {self.user_id}, "
f"country: {self.country_code}, "
f"anonymous_id: {self.anonymous_user_id}"
)
def _bind_anonymous_user(self):
try:
r = self.session.put(
self.config["endpoints"]["current_user"],
json={"anonymous_user_uuid": self.anonymous_user_id},
headers={"Content-Type": "application/json"}
)
if r.ok:
2025-11-12 11:13:45 +01:00
self.log.debug("Anonymous user ID successfully bound to account.")
2025-11-10 18:37:40 +01:00
else:
self.log.warning(f"Failed to bind anonymous_user_uuid: {r.status_code}")
except Exception as e:
self.log.warning(f"Exception while binding anonymous_user_uuid: {e}")
def get_titles(self) -> Titles_T:
if self.is_series:
return self._get_series_titles()
else:
return self._get_film_title()
def _get_film_title(self) -> Movies:
url = self.config["endpoints"]["film_by_slug"].format(slug=self.slug)
r = self.session.get(url)
r.raise_for_status()
data = r.json()
self.film_id = data["id"]
2026-02-06 17:02:24 +01:00
# Fetch reels for language detection and subtitle names
2025-11-10 18:37:40 +01:00
url_reels = self.config["endpoints"]["reels"].format(film_id=self.film_id)
r_reels = self.session.get(url_reels)
r_reels.raise_for_status()
self.reels_data = r_reels.json()
2026-02-06 17:02:24 +01:00
# Detect original language from first audio track
original_language_code = "en"
2025-11-10 18:37:40 +01:00
if self.reels_data and self.reels_data[0].get("audio_tracks"):
first_audio_track = self.reels_data[0]["audio_tracks"][0]
if "language_code" in first_audio_track:
original_language_code = first_audio_track["language_code"]
2026-02-06 17:02:24 +01:00
self.log.debug(f"Detected original language from reels: '{original_language_code}'")
2025-11-10 18:37:40 +01:00
description = (
data.get("default_editorial_html", "")
.replace("<p>", "").replace("</p>", "").replace("<em>", "").replace("</em>", "").strip()
)
year = data.get("year")
name = data.get("title", "Unknown")
2026-02-06 17:02:24 +01:00
2025-11-10 18:37:40 +01:00
movie = Movie(
id_=self.film_id,
service=self.__class__,
name=name,
year=year,
description=description,
language=Language.get(original_language_code),
data=data,
)
return Movies([movie])
def _get_series_titles(self) -> Titles_T:
series_url = self.config["endpoints"]["series"].format(series_slug=self.series_slug)
r_series = self.session.get(series_url)
r_series.raise_for_status()
series_data = r_series.json()
2025-11-12 11:13:45 +01:00
episodes = []
2026-02-06 17:02:24 +01:00
2025-11-12 11:13:45 +01:00
if self.season_slug:
eps_url = self.config["endpoints"]["season_episodes"].format(
series_slug=self.series_slug,
season_slug=self.season_slug
)
r_eps = self.session.get(eps_url)
if r_eps.status_code == 404:
raise ValueError(f"Season '{self.season_slug}' not found.")
r_eps.raise_for_status()
episodes_data = r_eps.json().get("episodes", [])
self._add_episodes_to_list(episodes, episodes_data, series_data)
else:
2025-11-10 18:37:40 +01:00
seasons = series_data.get("seasons", [])
if not seasons:
raise ValueError("No seasons found for this series.")
2025-11-12 11:13:45 +01:00
for season in seasons:
season_slug = season["slug"]
eps_url = self.config["endpoints"]["season_episodes"].format(
series_slug=self.series_slug,
season_slug=season_slug
)
self.log.debug(f"Fetching episodes for season: {season_slug}")
r_eps = self.session.get(eps_url)
if r_eps.status_code == 404:
self.log.info(f"Season '{season_slug}' not available, skipping.")
continue
r_eps.raise_for_status()
episodes_data = r_eps.json().get("episodes", [])
if not episodes_data:
self.log.info(f"No episodes found in season '{season_slug}'.")
continue
self._add_episodes_to_list(episodes, episodes_data, series_data)
2026-02-06 17:02:24 +01:00
2025-11-12 11:13:45 +01:00
return Series(sorted(episodes, key=lambda x: (x.season, x.number)))
def _add_episodes_to_list(self, episodes_list: list, episodes_data: list, series_data: dict):
2025-11-10 18:37:40 +01:00
for ep in episodes_data:
playback_langs = ep.get("consumable", {}).get("playback_languages", {})
audio_langs = playback_langs.get("audio_options", ["English"])
lang_code = audio_langs[0].split()[0].lower() if audio_langs else "en"
try:
detected_lang = Language.get(lang_code)
except:
detected_lang = Language.get("en")
2025-11-12 11:13:45 +01:00
episodes_list.append(Episode(
2025-11-10 18:37:40 +01:00
id_=ep["id"],
service=self.__class__,
2026-02-06 17:02:24 +01:00
title=series_data["title"],
2025-11-10 18:37:40 +01:00
season=ep["episode"]["season_number"],
number=ep["episode"]["number"],
2026-02-06 17:02:24 +01:00
name=ep["title"],
2025-11-10 18:37:40 +01:00
description=ep.get("short_synopsis", ""),
language=detected_lang,
2026-02-06 17:02:24 +01:00
data=ep,
2025-11-10 18:37:40 +01:00
))
def get_tracks(self, title: Title_T) -> Tracks:
film_id = getattr(title, "id", None)
if not film_id:
raise RuntimeError("Title ID not found.")
2026-02-06 17:02:24 +01:00
# Initiate viewing session
2025-11-12 11:13:45 +01:00
url_view = self.config["endpoints"]["initiate_viewing"].format(film_id=film_id)
r_view = self.session.post(url_view, json={}, headers={"Content-Type": "application/json"})
r_view.raise_for_status()
view_data = r_view.json()
reel_id = view_data["reel_id"]
2026-02-06 17:02:24 +01:00
# Fetch reels data if not cached
if not self.film_id:
self.film_id = film_id
if not self.reels_data:
url_reels = self.config["endpoints"]["reels"].format(film_id=film_id)
r_reels = self.session.get(url_reels)
r_reels.raise_for_status()
self.reels_data = r_reels.json()
reels = self.reels_data
text_tracks_reel = reels[0]["text_tracks"]
reel = next((r for r in reels if r["id"] == reel_id), reels[0])
# Get secure streaming URL
2025-11-10 18:37:40 +01:00
url_secure = self.config["endpoints"]["secure_url"].format(film_id=film_id)
r_secure = self.session.get(url_secure)
r_secure.raise_for_status()
secure_data = r_secure.json()
2026-02-06 17:02:24 +01:00
# Find DASH manifest URL
2025-11-10 18:37:40 +01:00
manifest_url = None
for entry in secure_data.get("urls", []):
if entry.get("content_type") == "application/dash+xml":
manifest_url = entry["src"]
break
if not manifest_url:
raise ValueError("No DASH manifest URL found.")
2026-02-06 17:02:24 +01:00
manifest_url = re.sub(
r'/default/ver1\.AVC1\.[^/]*\.mpd',
'/default/ver1.hevc.ex-vtt.mpd',
manifest_url
)
# Fallback for non-AVC URLs
if '/default/ver1.hevc.ex-vtt.mpd' not in manifest_url:
manifest_url = re.sub(
r'/default/[^/]*\.mpd',
'/default/ver1.hevc.ex-vtt.mpd',
manifest_url
)
# Parse DASH manifest
2025-11-10 18:37:40 +01:00
tracks = DASH.from_url(manifest_url, session=self.session).to_tracks(language=title.language)
2026-02-06 17:02:24 +01:00
# Add enhanced subtitles (forced/SDH detection)
2025-11-10 18:37:40 +01:00
subtitles = []
for sub in secure_data.get("text_track_urls", []):
lang_code = sub.get("language_code", "und")
vtt_url = sub.get("url")
2026-02-06 17:02:24 +01:00
role = sub.get("role")
forced = False
sdh = False
2025-11-10 18:37:40 +01:00
if not vtt_url:
continue
2026-02-06 17:02:24 +01:00
try:
disp_name = (next(filter(lambda x: x['id'] == sub["id"], text_tracks_reel), None))["display_name"]
except:
disp_name = sub.get("role", "") + " " + lang_code.upper()
if role == "forced-subtitle":
forced = True
if role == "caption":
sdh = True
if "(SDH)" in disp_name:
disp_name = disp_name.replace("(SDH)", "").strip()
2025-11-10 18:37:40 +01:00
is_original = lang_code == title.language.language
subtitles.append(
Subtitle(
id_=sub["id"],
url=vtt_url,
language=Language.get(lang_code),
is_original_lang=is_original,
codec=Subtitle.Codec.WebVTT,
2026-02-06 17:02:24 +01:00
name=disp_name,
forced=forced,
sdh=sdh,
2025-11-10 18:37:40 +01:00
)
)
tracks.subtitles = subtitles
return tracks
2026-02-06 17:02:24 +01:00
def search(self) -> Generator[SearchResult, None, None]:
"""
Search MUBI films using official API endpoint.
Returns only playable films with proper metadata formatting.
"""
params = {
"query": self.raw_title,
"page": 1,
"per_page": 24,
"playable": "true",
"all_films_on_zero_hits": "true"
}
response = self.session.get(
url=self.config["endpoints"]["search"],
params=params
)
response.raise_for_status()
results = response.json()
for film in results.get("films", []):
display_title = f"{film['title']} ({film['year']})"
yield SearchResult(
id_=film["id"],
title=display_title,
label="MOVIE",
url=film["web_url"].rstrip() # Clean trailing spaces
)
2025-11-10 18:37:40 +01:00
def get_chapters(self, title: Title_T) -> list[Chapter]:
return []
def get_widevine_license(self, challenge: bytes, title: Title_T, track: AnyTrack) -> bytes:
if not self.user_id:
raise RuntimeError("user_id not set — authenticate first.")
2026-02-06 17:02:24 +01:00
# Cookie-based license request (NO dtinfo - credentials removed)
2025-11-10 18:37:40 +01:00
dt_custom_data = {
"userId": self.user_id,
"sessionId": self.lt_token,
"merchant": "mubi"
}
dt_custom_data_b64 = base64.b64encode(json.dumps(dt_custom_data).encode()).decode()
headers = {
2026-02-06 17:02:24 +01:00
"User-Agent": "Mozilla/5.0 (Linux; Android 13; SM-G975F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36",
2025-11-10 18:37:40 +01:00
"Accept": "*/*",
"Origin": "https://mubi.com",
"Referer": "https://mubi.com/",
"dt-custom-data": dt_custom_data_b64,
}
r = self.session.post(
self.config["endpoints"]["license"],
data=challenge,
headers=headers,
)
r.raise_for_status()
license_data = r.json()
if license_data.get("status") != "OK":
raise PermissionError(f"DRM license error: {license_data}")
return base64.b64decode(license_data["license"])
2025-11-12 11:13:45 +01:00
2026-02-06 17:02:24 +01:00
def get_playready_license(self, challenge: bytes, title: Title_T, track: AnyTrack) -> bytes:
if not self.user_id:
raise RuntimeError("user_id not set — authenticate first.")
# Cookie-based PlayReady license request (NO dtinfo - credentials removed)
dt_custom_data = {
"userId": self.user_id,
"sessionId": self.lt_token,
"merchant": "mubi"
}
dt_custom_data_b64 = base64.b64encode(json.dumps(dt_custom_data).encode()).decode()
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36 Edg/144.0.0.0",
"Accept": "*/*",
"Origin": "https://mubi.com",
"Referer": "https://mubi.com/",
"dt-custom-data": dt_custom_data_b64,
}
r = self.session.post(
self.config["endpoints"]["license_pr"],
data=challenge,
headers=headers,
)
r.raise_for_status()
if r.status_code != 200:
raise PermissionError(f"DRM license error")
return r.content