Compare commits

..

19 Commits

Author SHA1 Message Date
retouching
09eda16882
fix(dl): delete old file after repackage (#114)
* fix(dl): delete old file after repackage

* fix(dl): using original_path instead of self.path in repackage method
2024-06-03 16:57:26 +01:00
rlaphoenix
a95d32de9e chore: Add config to gitignore 2024-05-17 02:29:46 +01:00
rlaphoenix
221cd145c4 refactor(dl): Make Widevine CDM config optional
With this change you no longer have to define/configure a CDM to load. This is something that isn't necessary for a lot of services.

Note: It's also now less hand-holdy in terms of correct config formatting/values. I.e. if you define a cdm by profile for a service slightly incorrectly, say a typo on the service or profile name, it will no longer warn you.
2024-05-17 01:52:45 +01:00
rlaphoenix
0310646cb2 fix(Subtitle): Skip merging segmented WebVTT if only 1 segment 2024-05-17 01:42:44 +01:00
rlaphoenix
3426fc145f fix(HLS): Decrypt AES-encrypted segments separately
We cannot merge all the encrypted AES-128-CBC (ClearKey) segments and then decrypt them in one go because each segment should be padded to a 16-byte boundary in CBC mode.

Since it uses PKCS#5 or #7 style (cant remember which) then the merged file has a 15 in 16 chance to fail the boundary check. And in the 1 in 16 odds that it passes the boundary check, it will not decrypt properly as each segment's padding will be treated as actual data, and not padding.
2024-05-17 01:15:37 +01:00
rlaphoenix
e57d755837 fix(clearkey): Do not pad data before decryption
This is seemingly unnecessary and simply incorrect at least for two sources (VGTV, and TRUTV).

Without this change it is not possible to correctly merge all segments without at least some problem in the resulting file.
2024-05-17 01:00:11 +01:00
rlaphoenix
03f3fec5cc refactor(dl): Only log errors/warnings from mkvmerge, list after message 2024-05-16 18:12:57 +01:00
rlaphoenix
2acee30e54 fix(utilities): Prevent finding the same box index over and over
Since it removed the data before the found box's index(-4), all loops would only find the same box at the same index again, but this time the box index would be 4 since all previous data was removed in the prior loop. Since the index-=4 code is only run if the index > 4, this never run on the second loop, and since this data now does not have the box length, Box.parse failed with an IOError.

This corrects looping through boxes and correctly obtains and parses each box.
2024-05-15 17:54:21 +01:00
rlaphoenix
2e697d93fc fix(dl): Log output from mkvmerge on failure 2024-05-15 14:00:38 +01:00
rlaphoenix
f08402d795 refactor: Warn falling back to requests as aria2c doesn't support Range 2024-05-11 22:59:31 +01:00
rlaphoenix
5ef95e942a fix(DASH): Use SegmentTemplate endNumber if available 2024-05-11 22:15:05 +01:00
rlaphoenix
dde55fd708 fix(DASH): Correct SegmentTemplate range stop value
Since range(start, stop) is start-inclusive but stop-exclusive, and DASH startNumber of SegmentTemplate typically will be 1 or not specified (defaulting to 1) it effectively worked by coincidence.

However, if startNumber was anything other than 1 than we will have a problem.
2024-05-11 22:13:28 +01:00
rlaphoenix
345cc5aba6
Merge pull request #110 from adbbbb/master
Adding Arm64 OSX Shaka support
2024-05-11 20:13:30 +01:00
rlaphoenix
145e7a6c17 docs(contributors): Add adbbbb to Contributor list 2024-05-11 20:13:01 +01:00
Adam
5706bb1417 fix(binaries): Search for Arm64 builds of Shaka-Packager 2024-05-11 20:11:29 +01:00
rlaphoenix
85246ab419
Merge pull request #109 from pandamoon21/master
Fix uppercase letters in the fonts extension - Font attachment
2024-05-11 17:46:04 +01:00
rlaphoenix
71a3a4e2c4 docs(contributors): Add pandamoon21 to Contributor list 2024-05-11 17:45:10 +01:00
pandamoon21
06d414975c fix(Attachment): Check mime-type case-insensitively 2024-05-11 17:43:32 +01:00
rlaphoenix
f419e04fad refactor(Track): Ensure data property is a defaultdict with dict factory
This is so both internal code and service code can save data to sub-keys without the parent keys needing to exist.

A doc-string is now set to the data property denoting some keys as reserved as well as their typing and meaning.

This also fixes a bug introduced in v3.3.3 where it will fail to download tracks without the "hls" key in the data property. This can happen when manually making Audio tracks using the HLS descriptor, and not putting any of the hls data the HLS class sets in to_tracks().
2024-05-09 15:15:22 +01:00
12 changed files with 129 additions and 51 deletions

2
.gitignore vendored
View File

@ -1,4 +1,6 @@
# devine
devine.yaml
devine.yml
*.mkv
*.mp4
*.exe

View File

@ -343,6 +343,8 @@ Please refrain from spam or asking for questions that infringe upon a Service's
<a href="https://github.com/Shivelight"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/20620780?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt="Shivelight"/></a>
<a href="https://github.com/knowhere01"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/113712042?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt="knowhere01"/></a>
<a href="https://github.com/retouching"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/33735357?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt="retouching"/></a>
<a href="https://github.com/pandamoon21"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/33972938?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt="pandamoon21"/></a>
<a href="https://github.com/adbbbb"><img src="https://images.weserv.nl/?url=avatars.githubusercontent.com/u/56319336?v=4&h=25&w=25&fit=cover&mask=circle&maxage=7d" alt="adbbbb"/></a>
## Licensing

View File

@ -178,9 +178,10 @@ class dl:
except ValueError as e:
self.log.error(f"Failed to load Widevine CDM, {e}")
sys.exit(1)
self.log.info(
f"Loaded {self.cdm.__class__.__name__} Widevine CDM: {self.cdm.system_id} (L{self.cdm.security_level})"
)
if self.cdm:
self.log.info(
f"Loaded {self.cdm.__class__.__name__} Widevine CDM: {self.cdm.system_id} (L{self.cdm.security_level})"
)
with console.status("Loading Key Vaults...", spinner="dots"):
self.vaults = Vaults(self.service)
@ -701,16 +702,22 @@ class dl:
):
for task_id, task_tracks in multiplex_tasks:
progress.start_task(task_id) # TODO: Needed?
muxed_path, return_code = task_tracks.mux(
muxed_path, return_code, errors = task_tracks.mux(
str(title),
progress=partial(progress.update, task_id=task_id),
delete=False
)
muxed_paths.append(muxed_path)
if return_code == 1:
self.log.warning("mkvmerge had at least one warning, will continue anyway...")
elif return_code >= 2:
self.log.error(f"Failed to Mux video to Matroska file ({return_code})")
if return_code >= 2:
self.log.error(f"Failed to Mux video to Matroska file ({return_code}):")
elif return_code == 1 or errors:
self.log.warning("mkvmerge had at least one warning or error, continuing anyway...")
for line in errors:
if line.startswith("#GUI#error"):
self.log.error(line)
else:
self.log.warning(line)
if return_code >= 2:
sys.exit(1)
for video_track in task_tracks.videos:
video_track.delete()
@ -930,21 +937,21 @@ class dl:
return Credential.loads(credentials) # type: ignore
@staticmethod
def get_cdm(service: str, profile: Optional[str] = None) -> WidevineCdm:
def get_cdm(service: str, profile: Optional[str] = None) -> Optional[WidevineCdm]:
"""
Get CDM for a specified service (either Local or Remote CDM).
Raises a ValueError if there's a problem getting a CDM.
"""
cdm_name = config.cdm.get(service) or config.cdm.get("default")
if not cdm_name:
raise ValueError("A CDM to use wasn't listed in the config")
return None
if isinstance(cdm_name, dict):
if not profile:
raise ValueError("CDM config is mapped for profiles, but no profile was chosen")
return None
cdm_name = cdm_name.get(profile) or config.cdm.get("default")
if not cdm_name:
raise ValueError(f"A CDM to use was not mapped for the profile {profile}")
return None
cdm_api = next(iter(x for x in config.remote_cdm if x["name"] == cdm_name), None)
if cdm_api:

View File

@ -26,6 +26,7 @@ ShakaPackager = find(
"shaka-packager",
"packager",
f"packager-{__shaka_platform}",
f"packager-{__shaka_platform}-arm64",
f"packager-{__shaka_platform}-x64"
)
Aria2 = find("aria2c", "aria2")

View File

@ -7,7 +7,7 @@ from typing import Optional, Union
from urllib.parse import urljoin
from Cryptodome.Cipher import AES
from Cryptodome.Util.Padding import pad, unpad
from Cryptodome.Util.Padding import unpad
from m3u8.model import Key
from requests import Session
@ -43,7 +43,7 @@ class ClearKey:
decrypted = AES. \
new(self.key, AES.MODE_CBC, self.iv). \
decrypt(pad(path.read_bytes(), AES.block_size))
decrypt(path.read_bytes())
try:
decrypted = unpad(decrypted, AES.block_size)

View File

@ -292,6 +292,7 @@ class DASH:
if segment_template is not None:
segment_template = copy(segment_template)
start_number = int(segment_template.get("startNumber") or 1)
end_number = int(segment_template.get("endNumber") or 0) or None
segment_timeline = segment_template.find("SegmentTimeline")
segment_timescale = float(segment_template.get("timescale") or 1)
@ -328,9 +329,11 @@ class DASH:
for _ in range(1 + (int(s.get("r") or 0))):
segment_durations.append(current_time)
current_time += int(s.get("d"))
seg_num_list = list(range(start_number, len(segment_durations) + start_number))
for t, n in zip(segment_durations, seg_num_list):
if not end_number:
end_number = len(segment_durations)
for t, n in zip(segment_durations, range(start_number, end_number + 1)):
segments.append((
DASH.replace_fields(
segment_template.get("media"),
@ -345,9 +348,11 @@ class DASH:
raise ValueError("Duration of the Period was unable to be determined.")
period_duration = DASH.pt_to_sec(period_duration)
segment_duration = float(segment_template.get("duration")) or 1
total_segments = math.ceil(period_duration / (segment_duration / segment_timescale))
for s in range(start_number, start_number + total_segments):
if not end_number:
end_number = math.ceil(period_duration / (segment_duration / segment_timescale))
for s in range(start_number, end_number + 1):
segments.append((
DASH.replace_fields(
segment_template.get("media"),
@ -467,6 +472,7 @@ class DASH:
if downloader.__name__ == "aria2c" and any(bytes_range is not None for url, bytes_range in segments):
# aria2(c) is shit and doesn't support the Range header, fallback to the requests downloader
downloader = requests_downloader
log.warning("Falling back to the requests downloader as aria2(c) doesn't support the Range header")
for status_update in downloader(
urls=[

View File

@ -254,6 +254,12 @@ class HLS:
progress(total=total_segments)
downloader = track.downloader
if (
downloader.__name__ == "aria2c" and
any(x.byterange for x in master.segments if x not in unwanted_segments)
):
downloader = requests_downloader
log.warning("Falling back to the requests downloader as aria2(c) doesn't support the Range header")
urls: list[dict[str, Any]] = []
segment_durations: list[int] = []
@ -266,9 +272,6 @@ class HLS:
segment_durations.append(int(segment.duration))
if segment.byterange:
if downloader.__name__ == "aria2c":
# aria2(c) is shit and doesn't support the Range header, fallback to the requests downloader
downloader = requests_downloader
byte_range = HLS.calculate_byte_range(segment.byterange, range_offset)
range_offset = byte_range.split("-")[0]
else:
@ -384,15 +387,27 @@ class HLS:
elif len(files) != range_len:
raise ValueError(f"Missing {range_len - len(files)} segment files for {segment_range}...")
merge(
to=merged_path,
via=files,
delete=True,
include_map_data=True
)
drm.decrypt(merged_path)
merged_path.rename(decrypted_path)
if isinstance(drm, Widevine):
# with widevine we can merge all segments and decrypt once
merge(
to=merged_path,
via=files,
delete=True,
include_map_data=True
)
drm.decrypt(merged_path)
merged_path.rename(decrypted_path)
else:
# with other drm we must decrypt separately and then merge them
# for aes this is because each segment likely has 16-byte padding
for file in files:
drm.decrypt(file)
merge(
to=merged_path,
via=files,
delete=True,
include_map_data=True
)
events.emit(
events.Types.TRACK_DECRYPTED,

View File

@ -37,7 +37,7 @@ class Attachment:
mime_type = {
".ttf": "application/x-truetype-font",
".otf": "application/vnd.ms-opentype"
}.get(path.suffix, mimetypes.guess_type(path)[0])
}.get(path.suffix.lower(), mimetypes.guess_type(path)[0])
if not mime_type:
raise ValueError("The attachment mime-type could not be automatically detected.")

View File

@ -206,17 +206,19 @@ class Subtitle(Track):
elif self.codec == Subtitle.Codec.WebVTT:
text = self.path.read_text("utf8")
if self.descriptor == Track.Descriptor.DASH:
text = merge_segmented_webvtt(
text,
segment_durations=self.data["dash"]["segment_durations"],
timescale=self.data["dash"]["timescale"]
)
if len(self.data["dash"]["segment_durations"]) > 1:
text = merge_segmented_webvtt(
text,
segment_durations=self.data["dash"]["segment_durations"],
timescale=self.data["dash"]["timescale"]
)
elif self.descriptor == Track.Descriptor.HLS:
text = merge_segmented_webvtt(
text,
segment_durations=self.data["hls"]["segment_durations"],
timescale=1 # ?
)
if len(self.data["hls"]["segment_durations"]) > 1:
text = merge_segmented_webvtt(
text,
segment_durations=self.data["hls"]["segment_durations"],
timescale=1 # ?
)
caption_set = pycaption.WebVTTReader().read(text)
Subtitle.merge_same_cues(caption_set)
subtitle_text = pycaption.WebVTTWriter().write(caption_set)

View File

@ -4,6 +4,7 @@ import logging
import re
import shutil
import subprocess
from collections import defaultdict
from copy import copy
from enum import Enum
from functools import partial
@ -42,7 +43,7 @@ class Track:
drm: Optional[Iterable[DRM_T]] = None,
edition: Optional[str] = None,
downloader: Optional[Callable] = None,
data: Optional[dict] = None,
data: Optional[Union[dict, defaultdict]] = None,
id_: Optional[str] = None,
) -> None:
if not isinstance(url, (str, list)):
@ -63,8 +64,8 @@ class Track:
raise TypeError(f"Expected edition to be a {str}, not {type(edition)}")
if not isinstance(downloader, (Callable, type(None))):
raise TypeError(f"Expected downloader to be a {Callable}, not {type(downloader)}")
if not isinstance(data, (dict, type(None))):
raise TypeError(f"Expected data to be a {dict}, not {type(data)}")
if not isinstance(data, (dict, defaultdict, type(None))):
raise TypeError(f"Expected data to be a {dict} or {defaultdict}, not {type(data)}")
invalid_urls = ", ".join(set(type(x) for x in url if not isinstance(x, str)))
if invalid_urls:
@ -93,6 +94,7 @@ class Track:
self.drm = drm
self.edition: str = edition
self.downloader = downloader
self._data: defaultdict[Any, Any] = defaultdict(dict)
self.data = data or {}
if self.name is None:
@ -132,6 +134,42 @@ class Track:
def __eq__(self, other: Any) -> bool:
return isinstance(other, Track) and self.id == other.id
@property
def data(self) -> defaultdict[Any, Any]:
"""
Arbitrary track data dictionary.
A defaultdict is used with a dict as the factory for easier
nested saving and safer exists-checks.
Reserved keys:
- "hls" used by the HLS class.
- playlist: m3u8.model.Playlist - The primary track information.
- media: m3u8.model.Media - The audio/subtitle track information.
- segment_durations: list[int] - A list of each segment's duration.
- "dash" used by the DASH class.
- manifest: lxml.ElementTree - DASH MPD manifest.
- period: lxml.Element - The period of this track.
- adaptation_set: lxml.Element - The adaptation set of this track.
- representation: lxml.Element - The representation of this track.
- timescale: int - The timescale of the track's segments.
- segment_durations: list[int] - A list of each segment's duration.
You should not add, change, or remove any data within reserved keys.
You may use their data but do note that the values of them may change
or be removed at any point.
"""
return self._data
@data.setter
def data(self, value: Union[dict, defaultdict]) -> None:
if not isinstance(value, (dict, defaultdict)):
raise TypeError(f"Expected data to be a {dict} or {defaultdict}, not {type(value)}")
if isinstance(value, dict):
value = defaultdict(dict, **value)
self._data = value
def download(
self,
session: Session,
@ -504,6 +542,7 @@ class Track:
else:
raise
original_path.unlink()
self.path = output_path

View File

@ -316,7 +316,7 @@ class Tracks:
][:per_language or None])
return selected
def mux(self, title: str, delete: bool = True, progress: Optional[partial] = None) -> tuple[Path, int]:
def mux(self, title: str, delete: bool = True, progress: Optional[partial] = None) -> tuple[Path, int, list[str]]:
"""
Multiplex all the Tracks into a Matroska Container file.
@ -410,15 +410,18 @@ class Tracks:
# let potential failures go to caller, caller should handle
try:
errors = []
p = subprocess.Popen([
*cl,
"--output", str(output_path),
"--gui-mode"
], text=True, stdout=subprocess.PIPE)
for line in iter(p.stdout.readline, ""):
if line.startswith("#GUI#error") or line.startswith("#GUI#warning"):
errors.append(line)
if "progress" in line:
progress(total=100, completed=int(line.strip()[14:-1]))
return output_path, p.wait()
return output_path, p.wait(), errors
finally:
if chapters_path:
# regardless of delete param, we delete as it's a file we made during muxing

View File

@ -123,18 +123,18 @@ def get_boxes(data: bytes, box_type: bytes, as_bytes: bool = False) -> Box:
# since it doesn't care what child box the wanted box is from, this works fine.
if not isinstance(data, (bytes, bytearray)):
raise ValueError("data must be bytes")
offset = 0
while True:
try:
index = data.index(box_type)
index = data[offset:].index(box_type)
except ValueError:
break
if index < 0:
break
if index > 4:
index -= 4 # size is before box type and is 4 bytes long
data = data[index:]
index -= 4 # size is before box type and is 4 bytes long
try:
box = Box.parse(data)
box = Box.parse(data[offset:][index:])
except IOError:
# since get_init_segment might cut off unexpectedly, pymp4 may be unable to read
# the expected amounts of data and complain, so let's just end the function here
@ -147,6 +147,7 @@ def get_boxes(data: bytes, box_type: bytes, as_bytes: bool = False) -> Box:
raise e
if as_bytes:
box = Box.build(box)
offset += index + len(Box.build(box))
yield box