♻️ (DROP): refactor to use HLS for media extraction and improve code readability
This commit is contained in:
parent
dfb3bece52
commit
59fbc354bd
@ -1,15 +1,15 @@
|
||||
import re
|
||||
import json
|
||||
import click
|
||||
from typing import Optional, Union
|
||||
from http.cookiejar import CookieJar
|
||||
import json
|
||||
from bs4 import BeautifulSoup
|
||||
import click
|
||||
|
||||
from devine.core.service import Service
|
||||
from devine.core.titles import Episode, Series
|
||||
from devine.core.tracks import Tracks, Subtitle, Video, Audio
|
||||
from devine.core.manifests import DASH
|
||||
from devine.core.credential import Credential
|
||||
from devine.core.manifests import HLS
|
||||
|
||||
|
||||
class DROP(Service):
|
||||
@ -22,7 +22,9 @@ class DROP(Service):
|
||||
# Updated regex to capture anything between / and /season or end of the URL
|
||||
TITLE_RE = r"^(?:https?://(?:www\.)?dropout\.tv/)([^/]+)(?:/.*)?$"
|
||||
SERIES_RE = r"https?://(?:www\.)?dropout\.tv/([^/]+)(?:/season:(\d+))?/?$"
|
||||
EPISODE_RE = r"https?://(?:www\.)?dropout\.tv/([^/]+)/season:(\d+)/videos/([^/]+)/?$"
|
||||
EPISODE_RE = (
|
||||
r"https?://(?:www\.)?dropout\.tv/([^/]+)/season:(\d+)/videos/([^/]+)/?$"
|
||||
)
|
||||
|
||||
LOGIN_URL = "https://www.dropout.tv/login"
|
||||
|
||||
@ -37,7 +39,11 @@ class DROP(Service):
|
||||
self.title = title
|
||||
super().__init__(ctx)
|
||||
|
||||
def authenticate(self, cookies: Optional[CookieJar] = None, credential: Optional[Credential] = None) -> None:
|
||||
def authenticate(
|
||||
self,
|
||||
cookies: Optional[CookieJar] = None,
|
||||
credential: Optional[Credential] = None,
|
||||
) -> None:
|
||||
self.credentials = credential
|
||||
|
||||
if cookies:
|
||||
@ -50,7 +56,9 @@ class DROP(Service):
|
||||
"utf8": "true",
|
||||
}
|
||||
|
||||
response = self.session.post(self.LOGIN_URL, data=login_data, allow_redirects=False)
|
||||
response = self.session.post(
|
||||
self.LOGIN_URL, data=login_data, allow_redirects=False
|
||||
)
|
||||
|
||||
if '<div id="watch-unauthorized"' in response.text:
|
||||
self.log.error("Login failed")
|
||||
@ -58,7 +66,9 @@ class DROP(Service):
|
||||
else:
|
||||
self.log.info("Login successful")
|
||||
else:
|
||||
self.log.info("No login credentials provided, proceeding without authentication")
|
||||
self.log.info(
|
||||
"No login credentials provided, proceeding without authentication"
|
||||
)
|
||||
|
||||
def _get_authenticity_token(self):
|
||||
signin_page = self.session.get(self.LOGIN_URL).text
|
||||
@ -99,14 +109,22 @@ class DROP(Service):
|
||||
episode_link = item.find("a", class_="browse-item-link")
|
||||
if episode_link:
|
||||
episode_url = episode_link["href"]
|
||||
episode_data = json.loads(episode_link["data-track-event-properties"])
|
||||
episode_data = json.loads(
|
||||
episode_link["data-track-event-properties"]
|
||||
)
|
||||
|
||||
episode_id = episode_data["id"]
|
||||
episode_title = episode_data["label"]
|
||||
|
||||
episode_number_elem = item.find("span", class_="media-identifier media-episode")
|
||||
episode_number_elem = item.find(
|
||||
"span", class_="media-identifier media-episode"
|
||||
)
|
||||
episode_number = (
|
||||
int(re.search(r"Episode (\d+)", episode_number_elem.text).group(1))
|
||||
int(
|
||||
re.search(r"Episode (\d+)", episode_number_elem.text).group(
|
||||
1
|
||||
)
|
||||
)
|
||||
if episode_number_elem
|
||||
else None
|
||||
)
|
||||
@ -140,70 +158,109 @@ class DROP(Service):
|
||||
embed_url = embed_url_match.group(1)
|
||||
embed_url = embed_url.replace("&", "&") # Fix HTML entities
|
||||
|
||||
# Fetch the embed page
|
||||
embed_page = self.session.get(embed_url).text
|
||||
# Prepare headers for the embed page request
|
||||
headers = {
|
||||
"Referer": episode_url,
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Sec-Fetch-Dest": "iframe",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "cross-site",
|
||||
}
|
||||
|
||||
# Extract the playlist URL
|
||||
playlist_url_match = re.search(
|
||||
r'"(https://vod-adaptive-ak\.vimeocdn\.com/[^"]+playlist\.json[^"]+)"', embed_page
|
||||
# Fetch the embed page with headers
|
||||
embed_page = self.session.get(embed_url, headers=headers).text
|
||||
|
||||
# Extract the config_url
|
||||
config_url_match = re.search(r'config_url":"([^"]+)"', embed_page)
|
||||
if not config_url_match:
|
||||
raise ValueError("Could not find config_url in the embed page")
|
||||
|
||||
config_url = config_url_match.group(1).replace("\\u0026", "&")
|
||||
|
||||
# Fetch the config data
|
||||
config_data = self.session.get(config_url, headers=headers).json()
|
||||
|
||||
# Get the HLS playlist URL
|
||||
hls_url = config_data["request"]["files"]["hls"]["cdns"][
|
||||
"akfire_interconnect_quic"
|
||||
]["url"]
|
||||
|
||||
# Fetch and parse the HLS playlist
|
||||
hls_tracks = HLS.from_url(url=hls_url, session=self.session).to_tracks(
|
||||
language="en"
|
||||
)
|
||||
if not playlist_url_match:
|
||||
raise ValueError("Could not find playlist URL in the embed page")
|
||||
|
||||
playlist_url = playlist_url_match.group(1)
|
||||
|
||||
# Fetch and parse the playlist JSON
|
||||
playlist_data = self.session.get(playlist_url).json()
|
||||
|
||||
tracks = Tracks()
|
||||
|
||||
# Process video tracks
|
||||
for video_file in playlist_data.get("video", []):
|
||||
# Handle multiple video tracks
|
||||
for video in hls_tracks.videos:
|
||||
tracks.add(
|
||||
Video(
|
||||
id_=video_file["id"],
|
||||
url=video_file["base_url"] + video_file["init_segment"],
|
||||
codec=video_file["codecs"],
|
||||
language="en", # Assuming English as default
|
||||
bitrate=video_file.get("bitrate"),
|
||||
width=video_file.get("width"),
|
||||
height=video_file.get("height"),
|
||||
fps=video_file.get("framerate"),
|
||||
id_=f"video_{video.id}",
|
||||
url=video.url,
|
||||
codec=video.codec,
|
||||
language=video.language,
|
||||
bitrate=video.bitrate,
|
||||
width=video.width,
|
||||
height=video.height,
|
||||
fps=video.fps,
|
||||
)
|
||||
)
|
||||
|
||||
# Process audio tracks
|
||||
for audio_file in playlist_data.get("audio", []):
|
||||
# Handle multiple audio tracks
|
||||
for audio in hls_tracks.audio:
|
||||
tracks.add(
|
||||
Audio(
|
||||
id_=audio_file["id"],
|
||||
url=audio_file["base_url"] + audio_file["init_segment"],
|
||||
codec=audio_file["codecs"],
|
||||
language=audio_file.get("language", "en"), # Assuming English as default if not specified
|
||||
bitrate=audio_file.get("bitrate"),
|
||||
id_=f"audio_{audio.id}",
|
||||
url=audio.url,
|
||||
codec=audio.codec,
|
||||
language=audio.language,
|
||||
bitrate=audio.bitrate,
|
||||
)
|
||||
)
|
||||
|
||||
# Process subtitles
|
||||
for text_track in playlist_data.get("text_tracks", []):
|
||||
if text_track["kind"] == "captions":
|
||||
tracks.add(
|
||||
Subtitle(
|
||||
id_=text_track["id"],
|
||||
url=text_track["url"],
|
||||
codec=Subtitle.Codec.VTT,
|
||||
language=text_track["language"],
|
||||
is_original_lang=text_track.get("lang") == playlist_data.get("default_language"),
|
||||
)
|
||||
# Handle subtitles (if any)
|
||||
for subtitle in hls_tracks.subtitles:
|
||||
tracks.add(
|
||||
Subtitle(
|
||||
id_=f"subtitle_{subtitle.id}",
|
||||
url=subtitle.url,
|
||||
codec=subtitle.codec,
|
||||
language=subtitle.language,
|
||||
)
|
||||
)
|
||||
|
||||
return tracks
|
||||
return Tracks
|
||||
|
||||
def get_chapters(self, title):
|
||||
# Implement if DROPOUT.tv provides chapter information
|
||||
return []
|
||||
|
||||
def get_widevine_license(self, challenge: bytes, title: Union[Episode], track):
|
||||
# Implement the logic to fetch the Widevine license
|
||||
# This might involve making a request to a license server
|
||||
pass
|
||||
|
||||
def map_video_codec(self, codec_string):
|
||||
codec_map = {
|
||||
"avc1": Video.Codec.AVC,
|
||||
"hevc": Video.Codec.HEVC,
|
||||
"vp9": Video.Codec.VP9,
|
||||
"av1": Video.Codec.AV1,
|
||||
}
|
||||
for key, value in codec_map.items():
|
||||
if codec_string.startswith(key):
|
||||
return value
|
||||
return None
|
||||
|
||||
def map_audio_codec(self, codec_string):
|
||||
codec_map = {
|
||||
"mp4a": Audio.Codec.AAC,
|
||||
"ec-3": Audio.Codec.EC3,
|
||||
"ac-3": Audio.Codec.AC3,
|
||||
"opus": Audio.Codec.OPUS,
|
||||
}
|
||||
for key, value in codec_map.items():
|
||||
if codec_string.startswith(key):
|
||||
return value
|
||||
return None
|
||||
|
Loading…
Reference in New Issue
Block a user