♻️ (DROP): refactor to use HLS for media extraction and improve code readability

2024-09-06 21:09:53 -06:00 · 2024-09-06 21:09:53 -06:00 · 59fbc354bd
commit 59fbc354bd
parent dfb3bece52
1 changed files with 111 additions and 54 deletions
--- a/services/DROP/init.py
+++ b/services/DROP/init.py
@ -1,15 +1,15 @@
 import re
+import json
+import click
 from typing import Optional, Union
 from http.cookiejar import CookieJar
-import json
 from bs4 import BeautifulSoup
-import click

 from devine.core.service import Service
 from devine.core.titles import Episode, Series
 from devine.core.tracks import Tracks, Subtitle, Video, Audio
-from devine.core.manifests import DASH
 from devine.core.credential import Credential
+from devine.core.manifests import HLS


 class DROP(Service):
@ -22,7 +22,9 @@ class DROP(Service):
    # Updated regex to capture anything between / and /season or end of the URL
    TITLE_RE = r"^(?:https?://(?:www\.)?dropout\.tv/)([^/]+)(?:/.*)?$"
    SERIES_RE = r"https?://(?:www\.)?dropout\.tv/([^/]+)(?:/season:(\d+))?/?$"
-    EPISODE_RE = r"https?://(?:www\.)?dropout\.tv/([^/]+)/season:(\d+)/videos/([^/]+)/?$"
+    EPISODE_RE = (
+        r"https?://(?:www\.)?dropout\.tv/([^/]+)/season:(\d+)/videos/([^/]+)/?$"
+    )

    LOGIN_URL = "https://www.dropout.tv/login"

@ -37,7 +39,11 @@ class DROP(Service):
        self.title = title
        super().__init__(ctx)

-    def authenticate(self, cookies: Optional[CookieJar] = None, credential: Optional[Credential] = None) -> None:
+    def authenticate(
+        self,
+        cookies: Optional[CookieJar] = None,
+        credential: Optional[Credential] = None,
+    ) -> None:
        self.credentials = credential

        if cookies:
@ -50,7 +56,9 @@ class DROP(Service):
                "utf8": "true",
            }

-            response = self.session.post(self.LOGIN_URL, data=login_data, allow_redirects=False)
+            response = self.session.post(
+                self.LOGIN_URL, data=login_data, allow_redirects=False
+            )

            if '<div id="watch-unauthorized"' in response.text:
                self.log.error("Login failed")
@ -58,7 +66,9 @@ class DROP(Service):
            else:
                self.log.info("Login successful")
        else:
-            self.log.info("No login credentials provided, proceeding without authentication")
+            self.log.info(
+                "No login credentials provided, proceeding without authentication"
+            )

    def _get_authenticity_token(self):
        signin_page = self.session.get(self.LOGIN_URL).text
@ -99,14 +109,22 @@ class DROP(Service):
                episode_link = item.find("a", class_="browse-item-link")
                if episode_link:
                    episode_url = episode_link["href"]
-                    episode_data = json.loads(episode_link["data-track-event-properties"])
+                    episode_data = json.loads(
+                        episode_link["data-track-event-properties"]
+                    )

                    episode_id = episode_data["id"]
                    episode_title = episode_data["label"]

-                    episode_number_elem = item.find("span", class_="media-identifier media-episode")
+                    episode_number_elem = item.find(
+                        "span", class_="media-identifier media-episode"
+                    )
                    episode_number = (
-                        int(re.search(r"Episode (\d+)", episode_number_elem.text).group(1))
+                        int(
+                            re.search(r"Episode (\d+)", episode_number_elem.text).group(
+                                1
+                            )
+                        )
                        if episode_number_elem
                        else None
                    )
@ -140,70 +158,109 @@ class DROP(Service):
        embed_url = embed_url_match.group(1)
        embed_url = embed_url.replace("&amp;", "&")  # Fix HTML entities

-        # Fetch the embed page
-        embed_page = self.session.get(embed_url).text
+        # Prepare headers for the embed page request
+        headers = {
+            "Referer": episode_url,
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+            "Accept-Language": "en-US,en;q=0.5",
+            "Upgrade-Insecure-Requests": "1",
+            "Sec-Fetch-Dest": "iframe",
+            "Sec-Fetch-Mode": "navigate",
+            "Sec-Fetch-Site": "cross-site",
+        }

-        # Extract the playlist URL
-        playlist_url_match = re.search(
-            r'"(https://vod-adaptive-ak\.vimeocdn\.com/[^"]+playlist\.json[^"]+)"', embed_page
+        # Fetch the embed page with headers
+        embed_page = self.session.get(embed_url, headers=headers).text
+
+        # Extract the config_url
+        config_url_match = re.search(r'config_url":"([^"]+)"', embed_page)
+        if not config_url_match:
+            raise ValueError("Could not find config_url in the embed page")
+
+        config_url = config_url_match.group(1).replace("\\u0026", "&")
+
+        # Fetch the config data
+        config_data = self.session.get(config_url, headers=headers).json()
+
+        # Get the HLS playlist URL
+        hls_url = config_data["request"]["files"]["hls"]["cdns"][
+            "akfire_interconnect_quic"
+        ]["url"]
+
+        # Fetch and parse the HLS playlist
+        hls_tracks = HLS.from_url(url=hls_url, session=self.session).to_tracks(
+            language="en"
        )
-        if not playlist_url_match:
-            raise ValueError("Could not find playlist URL in the embed page")
-
-        playlist_url = playlist_url_match.group(1)
-
-        # Fetch and parse the playlist JSON
-        playlist_data = self.session.get(playlist_url).json()

        tracks = Tracks()

-        # Process video tracks
-        for video_file in playlist_data.get("video", []):
+        # Handle multiple video tracks
+        for video in hls_tracks.videos:
            tracks.add(
                Video(
-                    id_=video_file["id"],
-                    url=video_file["base_url"] + video_file["init_segment"],
-                    codec=video_file["codecs"],
-                    language="en",  # Assuming English as default
-                    bitrate=video_file.get("bitrate"),
-                    width=video_file.get("width"),
-                    height=video_file.get("height"),
-                    fps=video_file.get("framerate"),
+                    id_=f"video_{video.id}",
+                    url=video.url,
+                    codec=video.codec,
+                    language=video.language,
+                    bitrate=video.bitrate,
+                    width=video.width,
+                    height=video.height,
+                    fps=video.fps,
                )
            )

-        # Process audio tracks
-        for audio_file in playlist_data.get("audio", []):
+        # Handle multiple audio tracks
+        for audio in hls_tracks.audio:
            tracks.add(
                Audio(
-                    id_=audio_file["id"],
-                    url=audio_file["base_url"] + audio_file["init_segment"],
-                    codec=audio_file["codecs"],
-                    language=audio_file.get("language", "en"),  # Assuming English as default if not specified
-                    bitrate=audio_file.get("bitrate"),
+                    id_=f"audio_{audio.id}",
+                    url=audio.url,
+                    codec=audio.codec,
+                    language=audio.language,
+                    bitrate=audio.bitrate,
                )
            )

-        # Process subtitles
-        for text_track in playlist_data.get("text_tracks", []):
-            if text_track["kind"] == "captions":
+        # Handle subtitles (if any)
+        for subtitle in hls_tracks.subtitles:
            tracks.add(
                Subtitle(
-                        id_=text_track["id"],
-                        url=text_track["url"],
-                        codec=Subtitle.Codec.VTT,
-                        language=text_track["language"],
-                        is_original_lang=text_track.get("lang") == playlist_data.get("default_language"),
+                    id_=f"subtitle_{subtitle.id}",
+                    url=subtitle.url,
+                    codec=subtitle.codec,
+                    language=subtitle.language,
                )
            )

-        return tracks
+        return Tracks

    def get_chapters(self, title):
-        # Implement if DROPOUT.tv provides chapter information
        return []

    def get_widevine_license(self, challenge: bytes, title: Union[Episode], track):
-        # Implement the logic to fetch the Widevine license
-        # This might involve making a request to a license server
        pass
+
+    def map_video_codec(self, codec_string):
+        codec_map = {
+            "avc1": Video.Codec.AVC,
+            "hevc": Video.Codec.HEVC,
+            "vp9": Video.Codec.VP9,
+            "av1": Video.Codec.AV1,
+        }
+        for key, value in codec_map.items():
+            if codec_string.startswith(key):
+                return value
+        return None
+
+    def map_audio_codec(self, codec_string):
+        codec_map = {
+            "mp4a": Audio.Codec.AAC,
+            "ec-3": Audio.Codec.EC3,
+            "ac-3": Audio.Codec.AC3,
+            "opus": Audio.Codec.OPUS,
+        }
+        for key, value in codec_map.items():
+            if codec_string.startswith(key):
+                return value
+        return None