♻️ (DROP): refactor to use HLS for media extraction and improve code readability

This commit is contained in:
Sp4rk.y 2024-09-06 21:09:53 -06:00
parent dfb3bece52
commit 59fbc354bd

View File

@ -1,15 +1,15 @@
import re
import json
import click
from typing import Optional, Union
from http.cookiejar import CookieJar
import json
from bs4 import BeautifulSoup
import click
from devine.core.service import Service
from devine.core.titles import Episode, Series
from devine.core.tracks import Tracks, Subtitle, Video, Audio
from devine.core.manifests import DASH
from devine.core.credential import Credential
from devine.core.manifests import HLS
class DROP(Service):
@ -22,7 +22,9 @@ class DROP(Service):
# Updated regex to capture anything between / and /season or end of the URL
TITLE_RE = r"^(?:https?://(?:www\.)?dropout\.tv/)([^/]+)(?:/.*)?$"
SERIES_RE = r"https?://(?:www\.)?dropout\.tv/([^/]+)(?:/season:(\d+))?/?$"
EPISODE_RE = r"https?://(?:www\.)?dropout\.tv/([^/]+)/season:(\d+)/videos/([^/]+)/?$"
EPISODE_RE = (
r"https?://(?:www\.)?dropout\.tv/([^/]+)/season:(\d+)/videos/([^/]+)/?$"
)
LOGIN_URL = "https://www.dropout.tv/login"
@ -37,7 +39,11 @@ class DROP(Service):
self.title = title
super().__init__(ctx)
def authenticate(self, cookies: Optional[CookieJar] = None, credential: Optional[Credential] = None) -> None:
def authenticate(
self,
cookies: Optional[CookieJar] = None,
credential: Optional[Credential] = None,
) -> None:
self.credentials = credential
if cookies:
@ -50,7 +56,9 @@ class DROP(Service):
"utf8": "true",
}
response = self.session.post(self.LOGIN_URL, data=login_data, allow_redirects=False)
response = self.session.post(
self.LOGIN_URL, data=login_data, allow_redirects=False
)
if '<div id="watch-unauthorized"' in response.text:
self.log.error("Login failed")
@ -58,7 +66,9 @@ class DROP(Service):
else:
self.log.info("Login successful")
else:
self.log.info("No login credentials provided, proceeding without authentication")
self.log.info(
"No login credentials provided, proceeding without authentication"
)
def _get_authenticity_token(self):
signin_page = self.session.get(self.LOGIN_URL).text
@ -99,14 +109,22 @@ class DROP(Service):
episode_link = item.find("a", class_="browse-item-link")
if episode_link:
episode_url = episode_link["href"]
episode_data = json.loads(episode_link["data-track-event-properties"])
episode_data = json.loads(
episode_link["data-track-event-properties"]
)
episode_id = episode_data["id"]
episode_title = episode_data["label"]
episode_number_elem = item.find("span", class_="media-identifier media-episode")
episode_number_elem = item.find(
"span", class_="media-identifier media-episode"
)
episode_number = (
int(re.search(r"Episode (\d+)", episode_number_elem.text).group(1))
int(
re.search(r"Episode (\d+)", episode_number_elem.text).group(
1
)
)
if episode_number_elem
else None
)
@ -140,70 +158,109 @@ class DROP(Service):
embed_url = embed_url_match.group(1)
embed_url = embed_url.replace("&amp;", "&") # Fix HTML entities
# Fetch the embed page
embed_page = self.session.get(embed_url).text
# Prepare headers for the embed page request
headers = {
"Referer": episode_url,
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "iframe",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "cross-site",
}
# Extract the playlist URL
playlist_url_match = re.search(
r'"(https://vod-adaptive-ak\.vimeocdn\.com/[^"]+playlist\.json[^"]+)"', embed_page
# Fetch the embed page with headers
embed_page = self.session.get(embed_url, headers=headers).text
# Extract the config_url
config_url_match = re.search(r'config_url":"([^"]+)"', embed_page)
if not config_url_match:
raise ValueError("Could not find config_url in the embed page")
config_url = config_url_match.group(1).replace("\\u0026", "&")
# Fetch the config data
config_data = self.session.get(config_url, headers=headers).json()
# Get the HLS playlist URL
hls_url = config_data["request"]["files"]["hls"]["cdns"][
"akfire_interconnect_quic"
]["url"]
# Fetch and parse the HLS playlist
hls_tracks = HLS.from_url(url=hls_url, session=self.session).to_tracks(
language="en"
)
if not playlist_url_match:
raise ValueError("Could not find playlist URL in the embed page")
playlist_url = playlist_url_match.group(1)
# Fetch and parse the playlist JSON
playlist_data = self.session.get(playlist_url).json()
tracks = Tracks()
# Process video tracks
for video_file in playlist_data.get("video", []):
# Handle multiple video tracks
for video in hls_tracks.videos:
tracks.add(
Video(
id_=video_file["id"],
url=video_file["base_url"] + video_file["init_segment"],
codec=video_file["codecs"],
language="en", # Assuming English as default
bitrate=video_file.get("bitrate"),
width=video_file.get("width"),
height=video_file.get("height"),
fps=video_file.get("framerate"),
id_=f"video_{video.id}",
url=video.url,
codec=video.codec,
language=video.language,
bitrate=video.bitrate,
width=video.width,
height=video.height,
fps=video.fps,
)
)
# Process audio tracks
for audio_file in playlist_data.get("audio", []):
# Handle multiple audio tracks
for audio in hls_tracks.audio:
tracks.add(
Audio(
id_=audio_file["id"],
url=audio_file["base_url"] + audio_file["init_segment"],
codec=audio_file["codecs"],
language=audio_file.get("language", "en"), # Assuming English as default if not specified
bitrate=audio_file.get("bitrate"),
id_=f"audio_{audio.id}",
url=audio.url,
codec=audio.codec,
language=audio.language,
bitrate=audio.bitrate,
)
)
# Process subtitles
for text_track in playlist_data.get("text_tracks", []):
if text_track["kind"] == "captions":
# Handle subtitles (if any)
for subtitle in hls_tracks.subtitles:
tracks.add(
Subtitle(
id_=text_track["id"],
url=text_track["url"],
codec=Subtitle.Codec.VTT,
language=text_track["language"],
is_original_lang=text_track.get("lang") == playlist_data.get("default_language"),
id_=f"subtitle_{subtitle.id}",
url=subtitle.url,
codec=subtitle.codec,
language=subtitle.language,
)
)
return tracks
return Tracks
def get_chapters(self, title):
# Implement if DROPOUT.tv provides chapter information
return []
def get_widevine_license(self, challenge: bytes, title: Union[Episode], track):
# Implement the logic to fetch the Widevine license
# This might involve making a request to a license server
pass
def map_video_codec(self, codec_string):
codec_map = {
"avc1": Video.Codec.AVC,
"hevc": Video.Codec.HEVC,
"vp9": Video.Codec.VP9,
"av1": Video.Codec.AV1,
}
for key, value in codec_map.items():
if codec_string.startswith(key):
return value
return None
def map_audio_codec(self, codec_string):
codec_map = {
"mp4a": Audio.Codec.AAC,
"ec-3": Audio.Codec.EC3,
"ac-3": Audio.Codec.AC3,
"opus": Audio.Codec.OPUS,
}
for key, value in codec_map.items():
if codec_string.startswith(key):
return value
return None