unshackle-services/KNPY/__init__.py

import base64
import json
import re
from datetime import datetime, timezone
from http.cookiejar import CookieJar
from typing import List, Optional

import click
import jwt
from langcodes import Language

from unshackle.core.constants import AnyTrack
from unshackle.core.credential import Credential
from unshackle.core.manifests import DASH
from unshackle.core.search_result import SearchResult
from unshackle.core.service import Service
from unshackle.core.titles import Episode, Movie, Movies, Series, Title_T, Titles_T
from unshackle.core.tracks import Subtitle, Tracks


class KNPY(Service):
    """
    Service code for Kanopy (kanopy.com).
    Version: 1.0.0

    Auth: Credential (username + password)
    Security: FHD@L3

    Handles both Movies and Series (Playlists).
    Detects and stops for movies that require tickets.
    Caching included
    """

    # Updated regex to match the new URL structure with library subdomain and path
    TITLE_RE = r"^https?://(?:www\.)?kanopy\.com/.+/(?P<id>\d+)$"
    GEOFENCE = ()
    NO_SUBTITLES = False

    @staticmethod
    @click.command(name="KNPY", short_help="https://kanopy.com")
    @click.argument("title", type=str)
    @click.pass_context
    def cli(ctx, **kwargs):
        return KNPY(ctx, **kwargs)

    def __init__(self, ctx, title: str):
        super().__init__(ctx)
        if not self.config:
            raise ValueError("KNPY configuration not found. Ensure config.yaml exists.")

        self.cdm = ctx.obj.cdm

        match = re.match(self.TITLE_RE, title)
        if match:
            self.content_id = match.group("id")
        else:
            self.content_id = None
            self.search_query = title

        self.API_VERSION = self.config["client"]["api_version"]
        self.USER_AGENT = self.config["client"]["user_agent"]
        self.WIDEVINE_UA = self.config["client"]["widevine_ua"]

        self.session.headers.update({
            "x-version": self.API_VERSION,
            "user-agent": self.USER_AGENT
        })

        self._jwt = None
        self._visitor_id = None
        self._user_id = None
        self._domain_id = None
        self.widevine_license_url = None

    def authenticate(self, cookies: Optional[CookieJar] = None, credential: Optional[Credential] = None) -> None:
        if not credential or not credential.username or not credential.password:
            raise ValueError("Kanopy requires email and password for authentication.")

        cache = self.cache.get("auth_token")

        if cache and not cache.expired:
            cached_data = cache.data
            valid_token = None

            if isinstance(cached_data, dict) and "token" in cached_data:
                if cached_data.get("username") == credential.username:
                    valid_token = cached_data["token"]
                    self.log.info("Using cached authentication token")
                else:
                    self.log.info(f"Cached token belongs to '{cached_data.get('username')}', but logging in as '{credential.username}'. Re-authenticating.")

            elif isinstance(cached_data, str):
                self.log.info("Found legacy cached token format. Re-authenticating to ensure correct user.")

            if valid_token:
                self._jwt = valid_token
                self.session.headers.update({"authorization": f"Bearer {self._jwt}"})

                if not self._user_id or not self._domain_id or not self._visitor_id:
                    try:
                        decoded_jwt = jwt.decode(self._jwt, options={"verify_signature": False})
                        self._user_id = decoded_jwt["data"]["uid"]
                        self._visitor_id = decoded_jwt["data"]["visitor_id"]
                        self.log.info(f"Extracted user_id and visitor_id from cached token.")
                        self._fetch_user_details()
                        return
                    except (KeyError, jwt.DecodeError) as e:
                        self.log.error(f"Could not decode cached token: {e}. Re-authenticating.")

        self.log.info("Performing handshake to get visitor token...")
        r = self.session.get(self.config["endpoints"]["handshake"])
        r.raise_for_status()
        handshake_data = r.json()
        self._visitor_id = handshake_data["visitorId"]
        initial_jwt = handshake_data["jwt"]

        self.log.info(f"Logging in as {credential.username}...")
        login_payload = {
            "credentialType": "email",
            "emailUser": {
                "email": credential.username,
                "password": credential.password
            }
        }
        r = self.session.post(
            self.config["endpoints"]["login"],
            json=login_payload,
            headers={"authorization": f"Bearer {initial_jwt}"}
        )
        r.raise_for_status()
        login_data = r.json()
        self._jwt = login_data["jwt"]
        self._user_id = login_data["userId"]

        self.session.headers.update({"authorization": f"Bearer {self._jwt}"})
        self.log.info(f"Successfully authenticated as {credential.username}")

        self._fetch_user_details()

        try:
            decoded_jwt = jwt.decode(self._jwt, options={"verify_signature": False})
            exp_timestamp = decoded_jwt.get("exp")

            cache_payload = {
                "token": self._jwt,
                "username": credential.username
            }

            if exp_timestamp:
                expiration_in_seconds = int(exp_timestamp - datetime.now(timezone.utc).timestamp())
                self.log.info(f"Caching token for {expiration_in_seconds / 60:.2f} minutes.")
                cache.set(data=cache_payload, expiration=expiration_in_seconds)
            else:
                self.log.warning("JWT has no 'exp' claim, caching for 1 hour as a fallback.")
                cache.set(data=cache_payload, expiration=3600)
        except Exception as e:
            self.log.error(f"Failed to decode JWT for caching: {e}. Caching for 1 hour as a fallback.")
            cache.set(
                data={"token": self._jwt, "username": credential.username},
                expiration=3600
            )

    def _fetch_user_details(self):
        self.log.info("Fetching user library memberships...")
        r = self.session.get(self.config["endpoints"]["memberships"].format(user_id=self._user_id))
        r.raise_for_status()
        memberships = r.json()

        for membership in memberships.get("list", []):
            if membership.get("status") == "active" and membership.get("isDefault", False):
                self._domain_id = str(membership["domainId"])
                self.log.info(f"Using default library domain: {membership.get('sitename', 'Unknown')} (ID: {self._domain_id})")
                return

        if memberships.get("list"):
            self._domain_id = str(memberships["list"][0]["domainId"])
            self.log.warning(f"No default library found. Using first active domain: {self._domain_id}")
        else:
            raise ValueError("No active library memberships found for this user.")

    def get_titles(self) -> Titles_T:
        if not self.content_id:
            raise ValueError("A content ID is required to get titles. Use a URL or run a search first.")
        if not self._domain_id:
            raise ValueError("Domain ID not set. Authentication may have failed.")

        r = self.session.get(self.config["endpoints"]["video_info"].format(video_id=self.content_id, domain_id=self._domain_id))
        r.raise_for_status()
        content_data = r.json()

        content_type = content_data.get("type")

        def parse_lang(data):
            try:
                langs = data.get("languages", [])
                if langs and isinstance(langs, list) and len(langs) > 0:
                    return Language.find(langs[0])
            except:
                pass
            return Language.get("en")

        if content_type == "video":
            video_data = content_data["video"]
            movie = Movie(
                id_=str(video_data["videoId"]),
                service=self.__class__,
                name=video_data["title"],
                year=video_data.get("productionYear"),
                description=video_data.get("descriptionHtml", ""),
                language=parse_lang(video_data),
                data=video_data,
            )
            return Movies([movie])

        elif content_type == "playlist":
            playlist_data = content_data["playlist"]
            series_title = playlist_data["title"]
            series_year = playlist_data.get("productionYear")

            season_match = re.search(r'(?:Season|S)\s*(\d+)', series_title, re.IGNORECASE)
            season_num = int(season_match.group(1)) if season_match else 1

            r = self.session.get(self.config["endpoints"]["video_items"].format(video_id=self.content_id, domain_id=self._domain_id))
            r.raise_for_status()
            items_data = r.json()

            episodes = []
            for i, item in enumerate(items_data.get("list", [])):
                if item.get("type") != "video":
                    continue

                video_data = item["video"]
                ep_num = i + 1

                ep_title = video_data.get("title", "")
                ep_match = re.search(r'Ep(?:isode)?\.?\s*(\d+)', ep_title, re.IGNORECASE)
                if ep_match:
                    ep_num = int(ep_match.group(1))

                episodes.append(
                    Episode(
                        id_=str(video_data["videoId"]),
                        service=self.__class__,
                        title=series_title,
                        season=season_num,
                        number=ep_num,
                        name=video_data["title"],
                        description=video_data.get("descriptionHtml", ""),
                        year=video_data.get("productionYear", series_year),
                        language=parse_lang(video_data),
                        data=video_data,
                    )
                )

            series = Series(episodes)
            series.name = series_title
            series.description = playlist_data.get("descriptionHtml", "")
            series.year = series_year
            return series

        else:
            raise ValueError(f"Unsupported content type: {content_type}")

    def get_tracks(self, title: Title_T) -> Tracks:
        play_payload = {
            "videoId": int(title.id),
            "domainId": int(self._domain_id),
            "userId": int(self._user_id),
            "visitorId": self._visitor_id
        }

        if "authorization" not in self.session.headers:
            self.session.headers["authorization"] = f"Bearer {self._jwt}"
            self.session.headers["x-version"] = self.API_VERSION
            self.session.headers["user-agent"] = self.USER_AGENT

        r = self.session.post(
            self.config["endpoints"]["plays"],
            json=play_payload,
        )
        r.raise_for_status()
        play_data = r.json()

        manifest_url = None
        for manifest in play_data.get("manifests", []):
            if manifest["manifestType"] == "dash":
                manifest_relative_url = manifest["url"]
                if manifest_relative_url.startswith("/"):
                    manifest_url = f"https://kanopy.com{manifest_relative_url}"
                else:
                    manifest_url = manifest_relative_url

                drm_type = manifest.get("drmType")

                if drm_type == "kanopyDrm":
                    play_id = play_data.get("playId")
                    self.widevine_license_url = self.config["endpoints"]["widevine_license"].format(license_id=f"{play_id}-0")
                elif drm_type == "studioDrm":
                    license_id = manifest.get("drmLicenseID", f"{play_data.get('playId')}-1")
                    self.widevine_license_url = self.config["endpoints"]["widevine_license"].format(license_id=license_id)
                else:
                    self.log.warning(f"Unknown drmType: {drm_type}")
                    self.widevine_license_url = None
                break

        if not manifest_url:
            raise ValueError("Could not find a DASH manifest for this title.")
        if not self.widevine_license_url:
            raise ValueError("Could not construct Widevine license URL.")

        self.log.info(f"Fetching DASH manifest from: {manifest_url}")
        r = self.session.get(manifest_url)
        r.raise_for_status()

        self.session.headers.clear()
        self.session.headers.update({
            "User-Agent": self.WIDEVINE_UA,
            "Accept": "*/*",
            "Accept-Encoding": "gzip, deflate",
            "Connection": "keep-alive",
        })

        tracks = DASH.from_text(r.text, url=manifest_url).to_tracks(language=title.language)

        for caption_data in play_data.get("captions", []):
            lang_code = caption_data.get("language", "en")
            for file_info in caption_data.get("files", []):
                if file_info.get("type") == "webvtt":
                    tracks.add(Subtitle(
                        id_=f"caption-{lang_code}",
                        url=file_info["url"],
                        codec=Subtitle.Codec.WebVTT,
                        language=Language.get(lang_code)
                    ))
                    break

        return tracks

    def get_widevine_license(self, *, challenge: bytes, title: Title_T, track: AnyTrack) -> bytes:
        if not self.widevine_license_url:
            raise ValueError("Widevine license URL was not set. Call get_tracks first.")

        license_headers = {
            "Content-Type": "application/octet-stream",
            "User-Agent": self.WIDEVINE_UA,
            "Authorization": f"Bearer {self._jwt}",
            "X-Version": self.API_VERSION
        }

        r = self.session.post(
            self.widevine_license_url,
            data=challenge,
            headers=license_headers
        )
        r.raise_for_status()
        return r.content

    # def search(self) -> List[SearchResult]:
    #     if not hasattr(self, 'search_query'):
    #         self.log.error("Search query not set. Cannot search.")
    #         return []

    #     self.log.info(f"Searching for '{self.search_query}'...")
    #     params = {
    #         "query": self.search_query,
    #         "sort": "relevance",
    #         "domainId": self._domain_id,
    #         "page": 0,
    #         "perPage": 20
    #     }
    #     r = self.session.get(self.config["endpoints"]["search"], params=params)
    #     r.raise_for_status()
    #     search_data = r.json()

    #     results = []
    #     for item in search_data.get("list", []):
    #         item_type = item.get("type")
    #         if item_type not in ["playlist", "video"]:
    #             continue

    #         video_id = item.get("videoId")
    #         title = item.get("title", "No Title")
    #         label = "Series" if item_type == "playlist" else "Movie"

    #         results.append(
    #             SearchResult(
    #                 id_=str(video_id),
    #                 title=title,
    #                 description="",
    #                 label=label,
    #                 url=f"https://www.kanopy.com/watch/{video_id}"
    #             )
    #         )
    #     return results

    def get_chapters(self, title: Title_T) -> list:
        return []