fix(dl): delete old file after repackage (#114 )

* fix(dl): delete old file after repackage * fix(dl): using original_path instead of self.path in repackage method
chore: Add config to gitignore
2025-04-29 17:49:44 +00:00 · 2024-06-03 16:57:26 +01:00 · 2024-05-17 02:29:46 +01:00 · 2024-05-17 01:52:45 +01:00 · 2024-05-17 01:42:44 +01:00 · 2024-05-17 01:15:37 +01:00
12 changed files with 129 additions and 51 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,6 @@
 # devine
+devine.yaml
+devine.yml
 *.mkv
 *.mp4
 *.exe
--- a/README.md
+++ b/README.md
@ -343,6 +343,8 @@ Please refrain from spam or asking for questions that infringe upon a Service's
 <a href="https://github.com/Shivelight"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/20620780?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt="Shivelight"/></a>
 <a href="https://github.com/knowhere01"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/113712042?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt="knowhere01"/></a>
 <a href="https://github.com/retouching"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/33735357?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt="retouching"/></a>
+<a href="https://github.com/pandamoon21"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/33972938?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt="pandamoon21"/></a>
+<a href="https://github.com/adbbbb"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/56319336?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt="adbbbb"/></a>

 ## Licensing

--- a/devine/commands/dl.py
+++ b/devine/commands/dl.py
@ -178,9 +178,10 @@ class dl:
            except ValueError as e:
                self.log.error(f"Failed to load Widevine CDM, {e}")
                sys.exit(1)
-            self.log.info(
-                f"Loaded {self.cdm.__class__.__name__} Widevine CDM: {self.cdm.system_id} (L{self.cdm.security_level})"
-            )
+            if self.cdm:
+                self.log.info(
+                    f"Loaded {self.cdm.__class__.__name__} Widevine CDM: {self.cdm.system_id} (L{self.cdm.security_level})"
+                )

        with console.status("Loading Key Vaults...", spinner="dots"):
            self.vaults = Vaults(self.service)
@ -701,16 +702,22 @@ class dl:
                    ):
                        for task_id, task_tracks in multiplex_tasks:
                            progress.start_task(task_id)  # TODO: Needed?
-                            muxed_path, return_code = task_tracks.mux(
+                            muxed_path, return_code, errors = task_tracks.mux(
                                str(title),
                                progress=partial(progress.update, task_id=task_id),
                                delete=False
                            )
                            muxed_paths.append(muxed_path)
-                            if return_code == 1:
-                                self.log.warning("mkvmerge had at least one warning, will continue anyway...")
-                            elif return_code >= 2:
-                                self.log.error(f"Failed to Mux video to Matroska file ({return_code})")
+                            if return_code >= 2:
+                                self.log.error(f"Failed to Mux video to Matroska file ({return_code}):")
+                            elif return_code == 1 or errors:
+                                self.log.warning("mkvmerge had at least one warning or error, continuing anyway...")
+                            for line in errors:
+                                if line.startswith("#GUI#error"):
+                                    self.log.error(line)
+                                else:
+                                    self.log.warning(line)
+                            if return_code >= 2:
                                sys.exit(1)
                            for video_track in task_tracks.videos:
                                video_track.delete()
@ -930,21 +937,21 @@ class dl:
                return Credential.loads(credentials)  # type: ignore

    @staticmethod
-    def get_cdm(service: str, profile: Optional[str] = None) -> WidevineCdm:
+    def get_cdm(service: str, profile: Optional[str] = None) -> Optional[WidevineCdm]:
        """
        Get CDM for a specified service (either Local or Remote CDM).
        Raises a ValueError if there's a problem getting a CDM.
        """
        cdm_name = config.cdm.get(service) or config.cdm.get("default")
        if not cdm_name:
-            raise ValueError("A CDM to use wasn't listed in the config")
+            return None

        if isinstance(cdm_name, dict):
            if not profile:
-                raise ValueError("CDM config is mapped for profiles, but no profile was chosen")
+                return None
            cdm_name = cdm_name.get(profile) or config.cdm.get("default")
            if not cdm_name:
-                raise ValueError(f"A CDM to use was not mapped for the profile {profile}")
+                return None

        cdm_api = next(iter(x for x in config.remote_cdm if x["name"] == cdm_name), None)
        if cdm_api:
--- a/devine/core/binaries.py
+++ b/devine/core/binaries.py
@ -26,6 +26,7 @@ ShakaPackager = find(
    "shaka-packager",
    "packager",
    f"packager-{__shaka_platform}",
+    f"packager-{__shaka_platform}-arm64",
    f"packager-{__shaka_platform}-x64"
 )
 Aria2 = find("aria2c", "aria2")
--- a/devine/core/drm/clearkey.py
+++ b/devine/core/drm/clearkey.py
@ -7,7 +7,7 @@ from typing import Optional, Union
 from urllib.parse import urljoin

 from Cryptodome.Cipher import AES
-from Cryptodome.Util.Padding import pad, unpad
+from Cryptodome.Util.Padding import unpad
 from m3u8.model import Key
 from requests import Session

@ -43,7 +43,7 @@ class ClearKey:

        decrypted = AES. \
            new(self.key, AES.MODE_CBC, self.iv). \
-            decrypt(pad(path.read_bytes(), AES.block_size))
+            decrypt(path.read_bytes())

        try:
            decrypted = unpad(decrypted, AES.block_size)
--- a/devine/core/manifests/dash.py
+++ b/devine/core/manifests/dash.py
@ -292,6 +292,7 @@ class DASH:
        if segment_template is not None:
            segment_template = copy(segment_template)
            start_number = int(segment_template.get("startNumber") or 1)
+            end_number = int(segment_template.get("endNumber") or 0) or None
            segment_timeline = segment_template.find("SegmentTimeline")
            segment_timescale = float(segment_template.get("timescale") or 1)

@ -328,9 +329,11 @@ class DASH:
                    for _ in range(1 + (int(s.get("r") or 0))):
                        segment_durations.append(current_time)
                        current_time += int(s.get("d"))
-                seg_num_list = list(range(start_number, len(segment_durations) + start_number))

-                for t, n in zip(segment_durations, seg_num_list):
+                if not end_number:
+                    end_number = len(segment_durations)
+
+                for t, n in zip(segment_durations, range(start_number, end_number + 1)):
                    segments.append((
                        DASH.replace_fields(
                            segment_template.get("media"),
@ -345,9 +348,11 @@ class DASH:
                    raise ValueError("Duration of the Period was unable to be determined.")
                period_duration = DASH.pt_to_sec(period_duration)
                segment_duration = float(segment_template.get("duration")) or 1
-                total_segments = math.ceil(period_duration / (segment_duration / segment_timescale))

-                for s in range(start_number, start_number + total_segments):
+                if not end_number:
+                    end_number = math.ceil(period_duration / (segment_duration / segment_timescale))
+
+                for s in range(start_number, end_number + 1):
                    segments.append((
                        DASH.replace_fields(
                            segment_template.get("media"),
@ -467,6 +472,7 @@ class DASH:
        if downloader.__name__ == "aria2c" and any(bytes_range is not None for url, bytes_range in segments):
            # aria2(c) is shit and doesn't support the Range header, fallback to the requests downloader
            downloader = requests_downloader
+            log.warning("Falling back to the requests downloader as aria2(c) doesn't support the Range header")

        for status_update in downloader(
            urls=[
--- a/devine/core/manifests/hls.py
+++ b/devine/core/manifests/hls.py
@ -254,6 +254,12 @@ class HLS:
        progress(total=total_segments)

        downloader = track.downloader
+        if (
+            downloader.__name__ == "aria2c" and
+            any(x.byterange for x in master.segments if x not in unwanted_segments)
+        ):
+            downloader = requests_downloader
+            log.warning("Falling back to the requests downloader as aria2(c) doesn't support the Range header")

        urls: list[dict[str, Any]] = []
        segment_durations: list[int] = []
@ -266,9 +272,6 @@ class HLS:
            segment_durations.append(int(segment.duration))

            if segment.byterange:
-                if downloader.__name__ == "aria2c":
-                    # aria2(c) is shit and doesn't support the Range header, fallback to the requests downloader
-                    downloader = requests_downloader
                byte_range = HLS.calculate_byte_range(segment.byterange, range_offset)
                range_offset = byte_range.split("-")[0]
            else:
@ -384,15 +387,27 @@ class HLS:
                elif len(files) != range_len:
                    raise ValueError(f"Missing {range_len - len(files)} segment files for {segment_range}...")

-                merge(
-                    to=merged_path,
-                    via=files,
-                    delete=True,
-                    include_map_data=True
-                )
-
-                drm.decrypt(merged_path)
-                merged_path.rename(decrypted_path)
+                if isinstance(drm, Widevine):
+                    # with widevine we can merge all segments and decrypt once
+                    merge(
+                        to=merged_path,
+                        via=files,
+                        delete=True,
+                        include_map_data=True
+                    )
+                    drm.decrypt(merged_path)
+                    merged_path.rename(decrypted_path)
+                else:
+                    # with other drm we must decrypt separately and then merge them
+                    # for aes this is because each segment likely has 16-byte padding
+                    for file in files:
+                        drm.decrypt(file)
+                    merge(
+                        to=merged_path,
+                        via=files,
+                        delete=True,
+                        include_map_data=True
+                    )

                events.emit(
                    events.Types.TRACK_DECRYPTED,
--- a/devine/core/tracks/attachment.py
+++ b/devine/core/tracks/attachment.py
@ -37,7 +37,7 @@ class Attachment:
            mime_type = {
                ".ttf": "application/x-truetype-font",
                ".otf": "application/vnd.ms-opentype"
-            }.get(path.suffix, mimetypes.guess_type(path)[0])
+            }.get(path.suffix.lower(), mimetypes.guess_type(path)[0])
            if not mime_type:
                raise ValueError("The attachment mime-type could not be automatically detected.")

--- a/devine/core/tracks/subtitle.py
+++ b/devine/core/tracks/subtitle.py
@ -206,17 +206,19 @@ class Subtitle(Track):
        elif self.codec == Subtitle.Codec.WebVTT:
            text = self.path.read_text("utf8")
            if self.descriptor == Track.Descriptor.DASH:
-                text = merge_segmented_webvtt(
-                    text,
-                    segment_durations=self.data["dash"]["segment_durations"],
-                    timescale=self.data["dash"]["timescale"]
-                )
+                if len(self.data["dash"]["segment_durations"]) > 1:
+                    text = merge_segmented_webvtt(
+                        text,
+                        segment_durations=self.data["dash"]["segment_durations"],
+                        timescale=self.data["dash"]["timescale"]
+                    )
            elif self.descriptor == Track.Descriptor.HLS:
-                text = merge_segmented_webvtt(
-                    text,
-                    segment_durations=self.data["hls"]["segment_durations"],
-                    timescale=1  # ?
-                )
+                if len(self.data["hls"]["segment_durations"]) > 1:
+                    text = merge_segmented_webvtt(
+                        text,
+                        segment_durations=self.data["hls"]["segment_durations"],
+                        timescale=1  # ?
+                    )
            caption_set = pycaption.WebVTTReader().read(text)
            Subtitle.merge_same_cues(caption_set)
            subtitle_text = pycaption.WebVTTWriter().write(caption_set)
--- a/devine/core/tracks/track.py
+++ b/devine/core/tracks/track.py
@ -4,6 +4,7 @@ import logging
 import re
 import shutil
 import subprocess
+from collections import defaultdict
 from copy import copy
 from enum import Enum
 from functools import partial
@ -42,7 +43,7 @@ class Track:
        drm: Optional[Iterable[DRM_T]] = None,
        edition: Optional[str] = None,
        downloader: Optional[Callable] = None,
-        data: Optional[dict] = None,
+        data: Optional[Union[dict, defaultdict]] = None,
        id_: Optional[str] = None,
    ) -> None:
        if not isinstance(url, (str, list)):
@ -63,8 +64,8 @@ class Track:
            raise TypeError(f"Expected edition to be a {str}, not {type(edition)}")
        if not isinstance(downloader, (Callable, type(None))):
            raise TypeError(f"Expected downloader to be a {Callable}, not {type(downloader)}")
-        if not isinstance(data, (dict, type(None))):
-            raise TypeError(f"Expected data to be a {dict}, not {type(data)}")
+        if not isinstance(data, (dict, defaultdict, type(None))):
+            raise TypeError(f"Expected data to be a {dict} or {defaultdict}, not {type(data)}")

        invalid_urls = ", ".join(set(type(x) for x in url if not isinstance(x, str)))
        if invalid_urls:
@ -93,6 +94,7 @@ class Track:
        self.drm = drm
        self.edition: str = edition
        self.downloader = downloader
+        self._data: defaultdict[Any, Any] = defaultdict(dict)
        self.data = data or {}

        if self.name is None:
@ -132,6 +134,42 @@ class Track:
    def __eq__(self, other: Any) -> bool:
        return isinstance(other, Track) and self.id == other.id

+    @property
+    def data(self) -> defaultdict[Any, Any]:
+        """
+        Arbitrary track data dictionary.
+
+        A defaultdict is used with a dict as the factory for easier
+        nested saving and safer exists-checks.
+
+        Reserved keys:
+
+        - "hls" used by the HLS class.
+          - playlist: m3u8.model.Playlist - The primary track information.
+          - media: m3u8.model.Media - The audio/subtitle track information.
+          - segment_durations: list[int] - A list of each segment's duration.
+        - "dash" used by the DASH class.
+          - manifest: lxml.ElementTree - DASH MPD manifest.
+          - period: lxml.Element - The period of this track.
+          - adaptation_set: lxml.Element - The adaptation set of this track.
+          - representation: lxml.Element - The representation of this track.
+          - timescale: int - The timescale of the track's segments.
+          - segment_durations: list[int] - A list of each segment's duration.
+
+        You should not add, change, or remove any data within reserved keys.
+        You may use their data but do note that the values of them may change
+        or be removed at any point.
+        """
+        return self._data
+
+    @data.setter
+    def data(self, value: Union[dict, defaultdict]) -> None:
+        if not isinstance(value, (dict, defaultdict)):
+            raise TypeError(f"Expected data to be a {dict} or {defaultdict}, not {type(value)}")
+        if isinstance(value, dict):
+            value = defaultdict(dict, **value)
+        self._data = value
+
    def download(
        self,
        session: Session,
@ -504,6 +542,7 @@ class Track:
            else:
                raise

+        original_path.unlink()
        self.path = output_path


--- a/devine/core/tracks/tracks.py
+++ b/devine/core/tracks/tracks.py
@ -316,7 +316,7 @@ class Tracks:
            ][:per_language or None])
        return selected

-    def mux(self, title: str, delete: bool = True, progress: Optional[partial] = None) -> tuple[Path, int]:
+    def mux(self, title: str, delete: bool = True, progress: Optional[partial] = None) -> tuple[Path, int, list[str]]:
        """
        Multiplex all the Tracks into a Matroska Container file.

@ -410,15 +410,18 @@ class Tracks:

        # let potential failures go to caller, caller should handle
        try:
+            errors = []
            p = subprocess.Popen([
                *cl,
                "--output", str(output_path),
                "--gui-mode"
            ], text=True, stdout=subprocess.PIPE)
            for line in iter(p.stdout.readline, ""):
+                if line.startswith("#GUI#error") or line.startswith("#GUI#warning"):
+                    errors.append(line)
                if "progress" in line:
                    progress(total=100, completed=int(line.strip()[14:-1]))
-            return output_path, p.wait()
+            return output_path, p.wait(), errors
        finally:
            if chapters_path:
                # regardless of delete param, we delete as it's a file we made during muxing
--- a/devine/core/utilities.py
+++ b/devine/core/utilities.py
@ -123,18 +123,18 @@ def get_boxes(data: bytes, box_type: bytes, as_bytes: bool = False) -> Box:
    # since it doesn't care what child box the wanted box is from, this works fine.
    if not isinstance(data, (bytes, bytearray)):
        raise ValueError("data must be bytes")
+
+    offset = 0
    while True:
        try:
-            index = data.index(box_type)
+            index = data[offset:].index(box_type)
        except ValueError:
            break
        if index < 0:
            break
-        if index > 4:
-            index -= 4  # size is before box type and is 4 bytes long
-        data = data[index:]
+        index -= 4  # size is before box type and is 4 bytes long
        try:
-            box = Box.parse(data)
+            box = Box.parse(data[offset:][index:])
        except IOError:
            # since get_init_segment might cut off unexpectedly, pymp4 may be unable to read
            # the expected amounts of data and complain, so let's just end the function here
@ -147,6 +147,7 @@ def get_boxes(data: bytes, box_type: bytes, as_bytes: bool = False) -> Box:
            raise e
        if as_bytes:
            box = Box.build(box)
+        offset += index + len(Box.build(box))
        yield box
Author	SHA1	Message	Date
retouching	09eda16882	fix(dl): delete old file after repackage (#114 ) * fix(dl): delete old file after repackage * fix(dl): using original_path instead of self.path in repackage method	2024-06-03 16:57:26 +01:00
rlaphoenix	a95d32de9e	chore: Add config to gitignore	2024-05-17 02:29:46 +01:00
rlaphoenix	221cd145c4	refactor(dl): Make Widevine CDM config optional With this change you no longer have to define/configure a CDM to load. This is something that isn't necessary for a lot of services. Note: It's also now less hand-holdy in terms of correct config formatting/values. I.e. if you define a cdm by profile for a service slightly incorrectly, say a typo on the service or profile name, it will no longer warn you.	2024-05-17 01:52:45 +01:00
rlaphoenix	0310646cb2	fix(Subtitle): Skip merging segmented WebVTT if only 1 segment	2024-05-17 01:42:44 +01:00
rlaphoenix	3426fc145f	fix(HLS): Decrypt AES-encrypted segments separately We cannot merge all the encrypted AES-128-CBC (ClearKey) segments and then decrypt them in one go because each segment should be padded to a 16-byte boundary in CBC mode. Since it uses PKCS#5 or #7 style (cant remember which) then the merged file has a 15 in 16 chance to fail the boundary check. And in the 1 in 16 odds that it passes the boundary check, it will not decrypt properly as each segment's padding will be treated as actual data, and not padding.	2024-05-17 01:15:37 +01:00
rlaphoenix	e57d755837	fix(clearkey): Do not pad data before decryption This is seemingly unnecessary and simply incorrect at least for two sources (VGTV, and TRUTV). Without this change it is not possible to correctly merge all segments without at least some problem in the resulting file.	2024-05-17 01:00:11 +01:00
rlaphoenix	03f3fec5cc	refactor(dl): Only log errors/warnings from mkvmerge, list after message	2024-05-16 18:12:57 +01:00
rlaphoenix	2acee30e54	fix(utilities): Prevent finding the same box index over and over Since it removed the data before the found box's index(-4), all loops would only find the same box at the same index again, but this time the box index would be 4 since all previous data was removed in the prior loop. Since the index-=4 code is only run if the index > 4, this never run on the second loop, and since this data now does not have the box length, Box.parse failed with an IOError. This corrects looping through boxes and correctly obtains and parses each box.	2024-05-15 17:54:21 +01:00
rlaphoenix	2e697d93fc	fix(dl): Log output from mkvmerge on failure	2024-05-15 14:00:38 +01:00
rlaphoenix	f08402d795	refactor: Warn falling back to requests as aria2c doesn't support Range	2024-05-11 22:59:31 +01:00
rlaphoenix	5ef95e942a	fix(DASH): Use SegmentTemplate endNumber if available	2024-05-11 22:15:05 +01:00
rlaphoenix	dde55fd708	fix(DASH): Correct SegmentTemplate range stop value Since range(start, stop) is start-inclusive but stop-exclusive, and DASH startNumber of SegmentTemplate typically will be 1 or not specified (defaulting to 1) it effectively worked by coincidence. However, if startNumber was anything other than 1 than we will have a problem.	2024-05-11 22:13:28 +01:00
rlaphoenix	345cc5aba6	Merge pull request #110 from adbbbb/master Adding Arm64 OSX Shaka support	2024-05-11 20:13:30 +01:00
rlaphoenix	145e7a6c17	docs(contributors): Add adbbbb to Contributor list	2024-05-11 20:13:01 +01:00
Adam	5706bb1417	fix(binaries): Search for Arm64 builds of Shaka-Packager	2024-05-11 20:11:29 +01:00
rlaphoenix	85246ab419	Merge pull request #109 from pandamoon21/master Fix uppercase letters in the fonts extension - Font attachment	2024-05-11 17:46:04 +01:00
rlaphoenix	71a3a4e2c4	docs(contributors): Add pandamoon21 to Contributor list	2024-05-11 17:45:10 +01:00
pandamoon21	06d414975c	fix(Attachment): Check mime-type case-insensitively	2024-05-11 17:43:32 +01:00
rlaphoenix	f419e04fad	refactor(Track): Ensure data property is a defaultdict with dict factory This is so both internal code and service code can save data to sub-keys without the parent keys needing to exist. A doc-string is now set to the data property denoting some keys as reserved as well as their typing and meaning. This also fixes a bug introduced in v3.3.3 where it will fail to download tracks without the "hls" key in the data property. This can happen when manually making Audio tracks using the HLS descriptor, and not putting any of the hls data the HLS class sets in to_tracks().	2024-05-09 15:15:22 +01:00