450 lines
18 KiB
Python
Raw Normal View History

from __future__ import annotations
import base64
import hashlib
import json
import re
import tempfile
import warnings
from collections.abc import Generator
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path
from typing import Any, Union
import click
from bs4 import XMLParsedAsHTMLWarning
from click import Context
from devine.core.manifests import DASH, HLS
from devine.core.search_result import SearchResult
from devine.core.service import Service
from devine.core.titles import Episode, Movie, Movies, Series
from devine.core.tracks import Audio, Chapters, Subtitle, Tracks, Video
from devine.core.utils.collections import as_list
from devine.core.utils.sslciphers import SSLCiphers
warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
class iP(Service):
"""
\b
Service code for the BBC iPlayer streaming service (https://www.bbc.co.uk/iplayer).
\b
Version: 1.0.1
Author: stabbedbybrick
Authorization: None
Security: None
\b
Tips:
- Use full title URL as input for best results.
- Use --list-titles before anything, iPlayer's listings are often messed up.
\b
- Use --range HLG to request H.265 UHD tracks
- See which titles are available in UHD:
https://www.bbc.co.uk/iplayer/help/questions/programme-availability/uhd-content
"""
ALIASES = ("bbciplayer", "bbc", "iplayer")
GEOFENCE = ("gb",)
TITLE_RE = r"^(?:https?://(?:www\.)?bbc\.co\.uk/(?:iplayer/(?P<kind>episode|episodes)/|programmes/))?(?P<id>[a-z0-9]+)(?:/.*)?$"
@staticmethod
@click.command(name="iP", short_help="https://www.bbc.co.uk/iplayer", help=__doc__)
@click.argument("title", type=str)
@click.pass_context
def cli(ctx: Context, **kwargs: Any) -> "iP":
return iP(ctx, **kwargs)
def __init__(self, ctx: Context, title: str):
super().__init__(ctx)
self.title = title
self.vcodec = ctx.parent.params.get("vcodec")
self.range = ctx.parent.params.get("range_")
self.session.headers.update({"user-agent": "BBCiPlayer/5.17.2.32046"})
if self.range and self.range[0].name == "HLG":
if not self.config.get("certificate"):
raise CertificateMissingError("HLG/H.265 tracks cannot be requested without a TLS certificate.")
self.session.headers.update({"user-agent": self.config["user_agent"]})
self.vcodec = "H.265"
def search(self) -> Generator[SearchResult, None, None]:
2025-01-17 10:36:27 +01:00
r = self.session.get(self.config["endpoints"]["search"], params={"q": self.title})
r.raise_for_status()
results = r.json().get("new_search", {}).get("results", [])
for result in results:
programme_type = result.get("type", "unknown")
2025-01-17 10:36:27 +01:00
category = result.get("labels", {}).get("category", "")
path = "episode" if programme_type == "episode" else "episodes"
yield SearchResult(
2025-01-17 10:36:27 +01:00
id_=result.get("id"),
title=result.get("title"),
2025-01-17 10:36:27 +01:00
description=result.get("synopses", {}).get("small"),
label=f"{programme_type} - {category}",
2025-01-17 10:36:27 +01:00
url=f"https://www.bbc.co.uk/iplayer/{path}/{result.get('id')}",
)
def get_titles(self) -> Union[Movies, Series]:
match = re.match(self.TITLE_RE, self.title)
if not match:
raise ValueError("Could not parse ID from title - is the URL/ID format correct?")
groups = match.groupdict()
pid = groups.get("id")
kind = groups.get("kind")
# Attempt to get brand/series data first
data = self.get_data(pid, slice_id=None)
# Handle case where the input is a direct episode URL and get_data fails
if data is None and kind == "episode":
return Series([self.fetch_episode(pid)])
if data is None:
raise MetadataError(f"Metadata not found for '{pid}'. If it's an episode, use the full URL.")
# If it's a "series" with only one item, it might be a movie.
2024-11-01 18:10:56 +01:00
if data.get("count", 0) < 2:
r = self.session.get(self.config["endpoints"]["episodes"].format(pid=pid))
r.raise_for_status()
episodes_data = r.json()
if not episodes_data.get("episodes"):
raise MetadataError(f"Episode metadata not found for '{pid}'.")
movie_data = episodes_data["episodes"][0]
return Movies(
[
Movie(
id_=movie_data.get("id"),
name=movie_data.get("title"),
year=(movie_data.get("release_date_time", "") or "").split("-")[0],
service=self.__class__,
language="en",
data=data,
)
]
)
# It's a full series
seasons = [self.get_data(pid, x["id"]) for x in data.get("slices") or [{"id": None}]]
episode_ids = [
episode.get("episode", {}).get("id")
for season in seasons
for episode in season.get("entities", {}).get("results", [])
if not episode.get("episode", {}).get("live")
and episode.get("episode", {}).get("id")
]
episodes = self.get_episodes(episode_ids)
return Series(episodes)
def get_tracks(self, title: Union[Movie, Episode]) -> Tracks:
versions = self._get_available_versions(title.id)
if not versions:
raise NoStreamsAvailableError("No available versions for this title were found.")
connections = [self.check_all_versions(version["pid"]) for version in versions]
connections = [c for c in connections if c]
if not connections:
if self.vcodec == "H.265":
raise NoStreamsAvailableError("Selection unavailable in UHD.")
raise NoStreamsAvailableError("Selection unavailable. Title may be missing or geo-blocked.")
media = self._select_best_media(connections)
if not media:
raise NoStreamsAvailableError("Could not find a suitable media stream.")
tracks = self._select_tracks(media, title.language)
return tracks
def get_chapters(self, title: Union[Movie, Episode]) -> Chapters:
return Chapters()
def _get_available_versions(self, pid: str) -> list[dict]:
"""Fetch all available versions for a programme ID."""
r = self.session.get(url=self.config["endpoints"]["playlist"].format(pid=pid))
r.raise_for_status()
2024-07-17 10:21:41 +02:00
playlist = r.json()
2024-07-17 10:21:41 +02:00
versions = playlist.get("allAvailableVersions")
if versions:
return versions
2024-07-17 10:21:41 +02:00
# Fallback to scraping webpage if API returns no versions
self.log.info("No versions in playlist API, falling back to webpage scrape.")
r = self.session.get(self.config["base_url"].format(type="episode", pid=pid))
r.raise_for_status()
match = re.search(r"window\.__IPLAYER_REDUX_STATE__\s*=\s*(.*?);\s*</script>", r.text)
if match:
redux_data = json.loads(match.group(1))
# Filter out audio-described versions
return [
{"pid": v.get("id")}
for v in redux_data.get("versions", {}).values()
if v.get("kind") != "audio-described" and v.get("id")
]
2024-07-17 10:21:41 +02:00
return []
def _select_best_media(self, connections: list[list[dict]]) -> list[dict]:
"""Selects the media group corresponding to the highest available video quality."""
heights = sorted(
{
int(c["height"])
for media_list in connections
for c in media_list
if c.get("height", "").isdigit()
},
reverse=True,
)
if not heights:
self.log.warning("No video streams with height information were found.")
# Fallback: return the first available media group if any exist.
return connections[0] if connections else None
highest_height = heights[0]
self.log.debug(f"Available resolutions (p): {heights}. Selecting highest: {highest_height}p.")
best_media_list = next(
(
media_list
for media_list in connections
if any(conn.get("height") == str(highest_height) for conn in media_list)
),
None, # Default to None if no matching group is found (should be impossible if heights is not empty)
)
return best_media_list
def _select_tracks(self, media: list[dict], lang: str):
for video_stream_info in (m for m in media if m.get("kind") == "video"):
connections = sorted(video_stream_info["connection"], key=lambda x: x.get("priority", 99))
if self.vcodec == "H.265":
connection = connections[0]
else:
connection = next((c for c in connections if c["supplier"] == "mf_akamai" and c["transferFormat"] == "dash"), None)
break
if not self.vcodec == "H.265":
if connection["transferFormat"] == "dash":
connection["href"] = "/".join(
connection["href"]
.replace("dash", "hls")
.split("?")[0]
.split("/")[0:-1]
+ ["hls", "master.m3u8"]
)
connection["transferFormat"] = "hls"
elif connection["transferFormat"] == "hls":
connection["href"] = "/".join(
connection["href"]
.replace(".hlsv2.ism", "")
.split("?")[0]
.split("/")[0:-1]
+ ["hls", "master.m3u8"]
)
if connection["transferFormat"] == "dash":
tracks = DASH.from_url(url=connection["href"], session=self.session).to_tracks(language=lang)
elif connection["transferFormat"] == "hls":
tracks = HLS.from_url(url=connection["href"], session=self.session).to_tracks(language=lang)
else:
raise ValueError(f"Unsupported transfer format: {connection['transferFormat']}")
for video in tracks.videos:
# UHD DASH manifest has no range information, so we add it manually
if video.codec == Video.Codec.HEVC:
video.range = Video.Range.HLG
if any(re.search(r"-audio_\w+=\d+", x) for x in as_list(video.url)):
# create audio stream from the video stream
audio_url = re.sub(r"-video=\d+", "", as_list(video.url)[0])
audio = Audio(
# use audio_url not video url, as to ignore video bitrate in ID
id_=hashlib.md5(audio_url.encode()).hexdigest()[0:7],
url=audio_url,
codec=Audio.Codec.from_codecs(video.data["hls"]["playlist"].stream_info.codecs),
language=video.data["hls"]["playlist"].media[0].language,
bitrate=int(self.find(r"-audio_\w+=(\d+)", as_list(video.url)[0]) or 0),
channels=video.data["hls"]["playlist"].media[0].channels,
descriptive=False, # Not available
descriptor=Audio.Descriptor.HLS,
drm=video.drm,
data=video.data,
)
if not tracks.exists(by_id=audio.id):
# some video streams use the same audio, so natural dupes exist
tracks.add(audio)
# remove audio from the video stream
video.url = [re.sub(r"-audio_\w+=\d+", "", x) for x in as_list(video.url)][0]
video.codec = Video.Codec.from_codecs(video.data["hls"]["playlist"].stream_info.codecs)
video.bitrate = int(self.find(r"-video=(\d+)", as_list(video.url)[0]) or 0)
for caption in [x for x in media if x["kind"] == "captions"]:
connection = sorted(caption["connection"], key=lambda x: x["priority"])[0]
tracks.add(
Subtitle(
id_=hashlib.md5(connection["href"].encode()).hexdigest()[0:6],
url=connection["href"],
codec=Subtitle.Codec.from_codecs("ttml"),
language=lang,
is_original_lang=True,
forced=False,
sdh=True,
)
)
break
return tracks
def get_data(self, pid: str, slice_id: str) -> dict:
"""Fetches programme metadata from the GraphQL-like endpoint."""
json_data = {
"id": "9fd1636abe711717c2baf00cebb668de",
"variables": {"id": pid, "perPage": 200, "page": 1, "sliceId": slice_id},
}
r = self.session.post(self.config["endpoints"]["metadata"], json=json_data)
r.raise_for_status()
return r.json().get("data", {}).get("programme")
def check_all_versions(self, vpid: str) -> list:
"""Checks media availability for a given version PID, trying multiple mediators."""
session = self.session
cert_path = None
params = {}
if self.vcodec == "H.265":
if not self.config.get("certificate"):
raise CertificateMissingError("TLS certificate not configured.")
session.mount("https://", SSLCiphers())
endpoint_template = self.config["endpoints"]["secure"]
mediators = ["securegate.iplayer.bbc.co.uk", "ipsecure.stage.bbc.co.uk"]
mediaset = "iptv-uhd"
cert_binary = base64.b64decode(self.config["certificate"])
with tempfile.NamedTemporaryFile(mode="w+b", delete=False, suffix=".pem") as cert_file:
cert_file.write(cert_binary)
cert_path = cert_file.name
params["cert"] = cert_path
else:
endpoint_template = self.config["endpoints"]["open"]
mediators = ["open.live.bbc.co.uk", "open.stage.bbc.co.uk"]
mediaset = "iptv-all"
for mediator in mediators:
if self.vcodec == "H.265":
url = endpoint_template.format(mediator, vpid, mediaset)
else:
url = endpoint_template.format(mediator, mediaset, vpid)
try:
r = session.get(url, **params)
r.raise_for_status()
availability = r.json()
if availability.get("media"):
return availability["media"]
if availability.get("result"):
self.log.warning(
f"Mediator '{mediator}' reported an error: {availability['result']}"
)
except Exception as e:
self.log.debug(f"Failed to check mediator '{mediator}': {e}")
finally:
if cert_path is not None:
Path(cert_path).unlink(missing_ok=True)
return None
def fetch_episode(self, pid: str) -> Episode:
"""Fetches and parses data for a single episode."""
r = self.session.get(self.config["endpoints"]["episodes"].format(pid=pid))
r.raise_for_status()
data = r.json()
if not data.get("episodes"):
return None
episode_data = data["episodes"][0]
subtitle = episode_data.get("subtitle", "")
year = (episode_data.get("release_date_time", "") or "").split("-")[0]
series_match = next(re.finditer(r"Series (\d+).*?:|Season (\d+).*?:|(\d{4}/\d{2}): Episode \d+", subtitle), None)
season_num = 0
if series_match:
season_str = next(g for g in series_match.groups() if g is not None)
season_num = int(season_str.replace("/", ""))
elif not data.get("slices"): # Fallback for single-season shows
season_num = 1
num_match = next(re.finditer(r"(\d+)\.|Episode (\d+)", subtitle), None)
number = 0
if num_match:
number = int(next(g for g in num_match.groups() if g is not None))
else:
number = episode_data.get("numeric_tleo_position", 0)
name_match = re.search(r"\d+\. (.+)", subtitle)
name = ""
if name_match:
name = name_match.group(1)
elif not re.search(r"Series \d+: Episode \d+", subtitle):
name = subtitle
return Episode(
id_=episode_data.get("id"),
service=self.__class__,
title=episode_data.get("title"),
season=season_num,
number=number,
name=name,
language="en",
year=year,
)
def get_episodes(self, episode_ids: list) -> list[Episode]:
"""Fetches multiple episodes concurrently."""
with ThreadPoolExecutor(max_workers=10) as executor:
tasks = executor.map(self.fetch_episode, episode_ids)
return [task for task in tasks if task is not None]
def find(self, pattern, string, group=None):
if group:
m = re.search(pattern, string)
if m:
return m.group(group)
else:
return next(iter(re.findall(pattern, string)), None)
class iPlayerError(Exception):
"""Base exception for this service."""
pass
class CertificateMissingError(iPlayerError):
"""Raised when an TLS certificate is required but not provided."""
pass
class NoStreamsAvailableError(iPlayerError):
"""Raised when no playable streams are found for a title."""
pass
class MetadataError(iPlayerError):
"""Raised when metadata for a title cannot be found."""
pass