forked from FairTrade/unshackle-services
397 lines
16 KiB
Python
397 lines
16 KiB
Python
import base64
|
|
import json
|
|
import re
|
|
from datetime import datetime, timezone
|
|
from http.cookiejar import CookieJar
|
|
from typing import List, Optional
|
|
|
|
import click
|
|
import jwt
|
|
from langcodes import Language
|
|
|
|
from unshackle.core.constants import AnyTrack
|
|
from unshackle.core.credential import Credential
|
|
from unshackle.core.manifests import DASH
|
|
from unshackle.core.search_result import SearchResult
|
|
from unshackle.core.service import Service
|
|
from unshackle.core.titles import Episode, Movie, Movies, Series, Title_T, Titles_T
|
|
from unshackle.core.tracks import Subtitle, Tracks
|
|
|
|
|
|
class KNPY(Service):
|
|
"""
|
|
Service code for Kanopy (kanopy.com).
|
|
Version: 1.0.0
|
|
|
|
Auth: Credential (username + password)
|
|
Security: FHD@L3
|
|
|
|
Handles both Movies and Series (Playlists).
|
|
Detects and stops for movies that require tickets.
|
|
Caching included
|
|
"""
|
|
|
|
# Updated regex to match the new URL structure with library subdomain and path
|
|
TITLE_RE = r"^https?://(?:www\.)?kanopy\.com/.+/(?P<id>\d+)$"
|
|
GEOFENCE = ()
|
|
NO_SUBTITLES = False
|
|
|
|
@staticmethod
|
|
@click.command(name="KNPY", short_help="https://kanopy.com")
|
|
@click.argument("title", type=str)
|
|
@click.pass_context
|
|
def cli(ctx, **kwargs):
|
|
return KNPY(ctx, **kwargs)
|
|
|
|
def __init__(self, ctx, title: str):
|
|
super().__init__(ctx)
|
|
if not self.config:
|
|
raise ValueError("KNPY configuration not found. Ensure config.yaml exists.")
|
|
|
|
self.cdm = ctx.obj.cdm
|
|
|
|
match = re.match(self.TITLE_RE, title)
|
|
if match:
|
|
self.content_id = match.group("id")
|
|
else:
|
|
self.content_id = None
|
|
self.search_query = title
|
|
|
|
self.API_VERSION = self.config["client"]["api_version"]
|
|
self.USER_AGENT = self.config["client"]["user_agent"]
|
|
self.WIDEVINE_UA = self.config["client"]["widevine_ua"]
|
|
|
|
self.session.headers.update({
|
|
"x-version": self.API_VERSION,
|
|
"user-agent": self.USER_AGENT
|
|
})
|
|
|
|
self._jwt = None
|
|
self._visitor_id = None
|
|
self._user_id = None
|
|
self._domain_id = None
|
|
self.widevine_license_url = None
|
|
|
|
def authenticate(self, cookies: Optional[CookieJar] = None, credential: Optional[Credential] = None) -> None:
|
|
if not credential or not credential.username or not credential.password:
|
|
raise ValueError("Kanopy requires email and password for authentication.")
|
|
|
|
cache = self.cache.get("auth_token")
|
|
|
|
if cache and not cache.expired:
|
|
cached_data = cache.data
|
|
valid_token = None
|
|
|
|
if isinstance(cached_data, dict) and "token" in cached_data:
|
|
if cached_data.get("username") == credential.username:
|
|
valid_token = cached_data["token"]
|
|
self.log.info("Using cached authentication token")
|
|
else:
|
|
self.log.info(f"Cached token belongs to '{cached_data.get('username')}', but logging in as '{credential.username}'. Re-authenticating.")
|
|
|
|
elif isinstance(cached_data, str):
|
|
self.log.info("Found legacy cached token format. Re-authenticating to ensure correct user.")
|
|
|
|
if valid_token:
|
|
self._jwt = valid_token
|
|
self.session.headers.update({"authorization": f"Bearer {self._jwt}"})
|
|
|
|
if not self._user_id or not self._domain_id or not self._visitor_id:
|
|
try:
|
|
decoded_jwt = jwt.decode(self._jwt, options={"verify_signature": False})
|
|
self._user_id = decoded_jwt["data"]["uid"]
|
|
self._visitor_id = decoded_jwt["data"]["visitor_id"]
|
|
self.log.info(f"Extracted user_id and visitor_id from cached token.")
|
|
self._fetch_user_details()
|
|
return
|
|
except (KeyError, jwt.DecodeError) as e:
|
|
self.log.error(f"Could not decode cached token: {e}. Re-authenticating.")
|
|
|
|
self.log.info("Performing handshake to get visitor token...")
|
|
r = self.session.get(self.config["endpoints"]["handshake"])
|
|
r.raise_for_status()
|
|
handshake_data = r.json()
|
|
self._visitor_id = handshake_data["visitorId"]
|
|
initial_jwt = handshake_data["jwt"]
|
|
|
|
self.log.info(f"Logging in as {credential.username}...")
|
|
login_payload = {
|
|
"credentialType": "email",
|
|
"emailUser": {
|
|
"email": credential.username,
|
|
"password": credential.password
|
|
}
|
|
}
|
|
r = self.session.post(
|
|
self.config["endpoints"]["login"],
|
|
json=login_payload,
|
|
headers={"authorization": f"Bearer {initial_jwt}"}
|
|
)
|
|
r.raise_for_status()
|
|
login_data = r.json()
|
|
self._jwt = login_data["jwt"]
|
|
self._user_id = login_data["userId"]
|
|
|
|
self.session.headers.update({"authorization": f"Bearer {self._jwt}"})
|
|
self.log.info(f"Successfully authenticated as {credential.username}")
|
|
|
|
self._fetch_user_details()
|
|
|
|
try:
|
|
decoded_jwt = jwt.decode(self._jwt, options={"verify_signature": False})
|
|
exp_timestamp = decoded_jwt.get("exp")
|
|
|
|
cache_payload = {
|
|
"token": self._jwt,
|
|
"username": credential.username
|
|
}
|
|
|
|
if exp_timestamp:
|
|
expiration_in_seconds = int(exp_timestamp - datetime.now(timezone.utc).timestamp())
|
|
self.log.info(f"Caching token for {expiration_in_seconds / 60:.2f} minutes.")
|
|
cache.set(data=cache_payload, expiration=expiration_in_seconds)
|
|
else:
|
|
self.log.warning("JWT has no 'exp' claim, caching for 1 hour as a fallback.")
|
|
cache.set(data=cache_payload, expiration=3600)
|
|
except Exception as e:
|
|
self.log.error(f"Failed to decode JWT for caching: {e}. Caching for 1 hour as a fallback.")
|
|
cache.set(
|
|
data={"token": self._jwt, "username": credential.username},
|
|
expiration=3600
|
|
)
|
|
|
|
def _fetch_user_details(self):
|
|
self.log.info("Fetching user library memberships...")
|
|
r = self.session.get(self.config["endpoints"]["memberships"].format(user_id=self._user_id))
|
|
r.raise_for_status()
|
|
memberships = r.json()
|
|
|
|
for membership in memberships.get("list", []):
|
|
if membership.get("status") == "active" and membership.get("isDefault", False):
|
|
self._domain_id = str(membership["domainId"])
|
|
self.log.info(f"Using default library domain: {membership.get('sitename', 'Unknown')} (ID: {self._domain_id})")
|
|
return
|
|
|
|
if memberships.get("list"):
|
|
self._domain_id = str(memberships["list"][0]["domainId"])
|
|
self.log.warning(f"No default library found. Using first active domain: {self._domain_id}")
|
|
else:
|
|
raise ValueError("No active library memberships found for this user.")
|
|
|
|
def get_titles(self) -> Titles_T:
|
|
if not self.content_id:
|
|
raise ValueError("A content ID is required to get titles. Use a URL or run a search first.")
|
|
if not self._domain_id:
|
|
raise ValueError("Domain ID not set. Authentication may have failed.")
|
|
|
|
r = self.session.get(self.config["endpoints"]["video_info"].format(video_id=self.content_id, domain_id=self._domain_id))
|
|
r.raise_for_status()
|
|
content_data = r.json()
|
|
|
|
content_type = content_data.get("type")
|
|
|
|
def parse_lang(data):
|
|
try:
|
|
langs = data.get("languages", [])
|
|
if langs and isinstance(langs, list) and len(langs) > 0:
|
|
return Language.find(langs[0])
|
|
except:
|
|
pass
|
|
return Language.get("en")
|
|
|
|
if content_type == "video":
|
|
video_data = content_data["video"]
|
|
movie = Movie(
|
|
id_=str(video_data["videoId"]),
|
|
service=self.__class__,
|
|
name=video_data["title"],
|
|
year=video_data.get("productionYear"),
|
|
description=video_data.get("descriptionHtml", ""),
|
|
language=parse_lang(video_data),
|
|
data=video_data,
|
|
)
|
|
return Movies([movie])
|
|
|
|
elif content_type == "playlist":
|
|
playlist_data = content_data["playlist"]
|
|
series_title = playlist_data["title"]
|
|
series_year = playlist_data.get("productionYear")
|
|
|
|
season_match = re.search(r'(?:Season|S)\s*(\d+)', series_title, re.IGNORECASE)
|
|
season_num = int(season_match.group(1)) if season_match else 1
|
|
|
|
r = self.session.get(self.config["endpoints"]["video_items"].format(video_id=self.content_id, domain_id=self._domain_id))
|
|
r.raise_for_status()
|
|
items_data = r.json()
|
|
|
|
episodes = []
|
|
for i, item in enumerate(items_data.get("list", [])):
|
|
if item.get("type") != "video":
|
|
continue
|
|
|
|
video_data = item["video"]
|
|
ep_num = i + 1
|
|
|
|
ep_title = video_data.get("title", "")
|
|
ep_match = re.search(r'Ep(?:isode)?\.?\s*(\d+)', ep_title, re.IGNORECASE)
|
|
if ep_match:
|
|
ep_num = int(ep_match.group(1))
|
|
|
|
episodes.append(
|
|
Episode(
|
|
id_=str(video_data["videoId"]),
|
|
service=self.__class__,
|
|
title=series_title,
|
|
season=season_num,
|
|
number=ep_num,
|
|
name=video_data["title"],
|
|
description=video_data.get("descriptionHtml", ""),
|
|
year=video_data.get("productionYear", series_year),
|
|
language=parse_lang(video_data),
|
|
data=video_data,
|
|
)
|
|
)
|
|
|
|
series = Series(episodes)
|
|
series.name = series_title
|
|
series.description = playlist_data.get("descriptionHtml", "")
|
|
series.year = series_year
|
|
return series
|
|
|
|
else:
|
|
raise ValueError(f"Unsupported content type: {content_type}")
|
|
|
|
def get_tracks(self, title: Title_T) -> Tracks:
|
|
play_payload = {
|
|
"videoId": int(title.id),
|
|
"domainId": int(self._domain_id),
|
|
"userId": int(self._user_id),
|
|
"visitorId": self._visitor_id
|
|
}
|
|
|
|
if "authorization" not in self.session.headers:
|
|
self.session.headers["authorization"] = f"Bearer {self._jwt}"
|
|
self.session.headers["x-version"] = self.API_VERSION
|
|
self.session.headers["user-agent"] = self.USER_AGENT
|
|
|
|
r = self.session.post(
|
|
self.config["endpoints"]["plays"],
|
|
json=play_payload,
|
|
)
|
|
r.raise_for_status()
|
|
play_data = r.json()
|
|
|
|
manifest_url = None
|
|
for manifest in play_data.get("manifests", []):
|
|
if manifest["manifestType"] == "dash":
|
|
manifest_relative_url = manifest["url"]
|
|
if manifest_relative_url.startswith("/"):
|
|
manifest_url = f"https://kanopy.com{manifest_relative_url}"
|
|
else:
|
|
manifest_url = manifest_relative_url
|
|
|
|
drm_type = manifest.get("drmType")
|
|
|
|
if drm_type == "kanopyDrm":
|
|
play_id = play_data.get("playId")
|
|
self.widevine_license_url = self.config["endpoints"]["widevine_license"].format(license_id=f"{play_id}-0")
|
|
elif drm_type == "studioDrm":
|
|
license_id = manifest.get("drmLicenseID", f"{play_data.get('playId')}-1")
|
|
self.widevine_license_url = self.config["endpoints"]["widevine_license"].format(license_id=license_id)
|
|
else:
|
|
self.log.warning(f"Unknown drmType: {drm_type}")
|
|
self.widevine_license_url = None
|
|
break
|
|
|
|
if not manifest_url:
|
|
raise ValueError("Could not find a DASH manifest for this title.")
|
|
if not self.widevine_license_url:
|
|
raise ValueError("Could not construct Widevine license URL.")
|
|
|
|
self.log.info(f"Fetching DASH manifest from: {manifest_url}")
|
|
r = self.session.get(manifest_url)
|
|
r.raise_for_status()
|
|
|
|
self.session.headers.clear()
|
|
self.session.headers.update({
|
|
"User-Agent": self.WIDEVINE_UA,
|
|
"Accept": "*/*",
|
|
"Accept-Encoding": "gzip, deflate",
|
|
"Connection": "keep-alive",
|
|
})
|
|
|
|
tracks = DASH.from_text(r.text, url=manifest_url).to_tracks(language=title.language)
|
|
|
|
for caption_data in play_data.get("captions", []):
|
|
lang_code = caption_data.get("language", "en")
|
|
for file_info in caption_data.get("files", []):
|
|
if file_info.get("type") == "webvtt":
|
|
tracks.add(Subtitle(
|
|
id_=f"caption-{lang_code}",
|
|
url=file_info["url"],
|
|
codec=Subtitle.Codec.WebVTT,
|
|
language=Language.get(lang_code)
|
|
))
|
|
break
|
|
|
|
return tracks
|
|
|
|
def get_widevine_license(self, *, challenge: bytes, title: Title_T, track: AnyTrack) -> bytes:
|
|
if not self.widevine_license_url:
|
|
raise ValueError("Widevine license URL was not set. Call get_tracks first.")
|
|
|
|
license_headers = {
|
|
"Content-Type": "application/octet-stream",
|
|
"User-Agent": self.WIDEVINE_UA,
|
|
"Authorization": f"Bearer {self._jwt}",
|
|
"X-Version": self.API_VERSION
|
|
}
|
|
|
|
r = self.session.post(
|
|
self.widevine_license_url,
|
|
data=challenge,
|
|
headers=license_headers
|
|
)
|
|
r.raise_for_status()
|
|
return r.content
|
|
|
|
# def search(self) -> List[SearchResult]:
|
|
# if not hasattr(self, 'search_query'):
|
|
# self.log.error("Search query not set. Cannot search.")
|
|
# return []
|
|
|
|
# self.log.info(f"Searching for '{self.search_query}'...")
|
|
# params = {
|
|
# "query": self.search_query,
|
|
# "sort": "relevance",
|
|
# "domainId": self._domain_id,
|
|
# "page": 0,
|
|
# "perPage": 20
|
|
# }
|
|
# r = self.session.get(self.config["endpoints"]["search"], params=params)
|
|
# r.raise_for_status()
|
|
# search_data = r.json()
|
|
|
|
# results = []
|
|
# for item in search_data.get("list", []):
|
|
# item_type = item.get("type")
|
|
# if item_type not in ["playlist", "video"]:
|
|
# continue
|
|
|
|
# video_id = item.get("videoId")
|
|
# title = item.get("title", "No Title")
|
|
# label = "Series" if item_type == "playlist" else "Movie"
|
|
|
|
# results.append(
|
|
# SearchResult(
|
|
# id_=str(video_id),
|
|
# title=title,
|
|
# description="",
|
|
# label=label,
|
|
# url=f"https://www.kanopy.com/watch/{video_id}"
|
|
# )
|
|
# )
|
|
# return results
|
|
|
|
def get_chapters(self, title: Title_T) -> list:
|
|
return [] |