mirror of
https://github.com/devine-dl/devine.git
synced 2025-04-29 17:49:44 +00:00
HEAD requests were made to sum a total file size of the download operation. However, the downloader is may be used on URLs where the content is not segmented media. Therefore, the server may not support or respond with the Content-Length header which causes the requests downloader to crash before it even gets a chance to begin downloading. Even still, this total size value isn't really necessary, and would cause possibly 100s of HEAD requests (in quick succession of each other) on segmented sources. It would also add up-front delay before it actually starts to download.
88 lines
2.7 KiB
Python
88 lines
2.7 KiB
Python
import time
|
|
from functools import partial
|
|
from pathlib import Path
|
|
from typing import Optional, Union, Any
|
|
|
|
from requests import Session
|
|
from rich import filesize
|
|
from rich.filesize import decimal
|
|
|
|
|
|
def requests(
|
|
uri: Union[str, list[str]],
|
|
out: Path,
|
|
headers: Optional[dict] = None,
|
|
proxy: Optional[str] = None,
|
|
progress: Optional[partial] = None,
|
|
*_: Any,
|
|
**__: Any
|
|
) -> int:
|
|
"""
|
|
Download files using Python Requests.
|
|
https://requests.readthedocs.io
|
|
|
|
If multiple URLs are provided they will be downloaded in the provided order
|
|
to the output directory. They will not be merged together.
|
|
"""
|
|
if isinstance(uri, list) and len(uri) == 1:
|
|
uri = uri[0]
|
|
|
|
if isinstance(uri, list):
|
|
if out.is_file():
|
|
raise ValueError("Expecting out to be a Directory path not a File as multiple URLs were provided")
|
|
uri = [
|
|
(url, out / f"{i:08}.mp4")
|
|
for i, url in enumerate(uri)
|
|
]
|
|
else:
|
|
uri = [(uri, out.parent / out.name)]
|
|
|
|
session = Session()
|
|
if headers:
|
|
headers = {
|
|
k: v
|
|
for k, v in headers.items()
|
|
if k.lower() != "accept-encoding"
|
|
}
|
|
session.headers.update(headers)
|
|
if proxy:
|
|
session.proxies.update({"all": proxy})
|
|
|
|
if progress:
|
|
progress(total=len(uri))
|
|
|
|
download_sizes = []
|
|
last_speed_refresh = time.time()
|
|
|
|
for url, out_path in uri:
|
|
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
stream = session.get(url, stream=True)
|
|
file_size = int(stream.headers["Content-Length"])
|
|
with open(out_path, "wb") as f:
|
|
written = 0
|
|
for chunk in stream.iter_content(chunk_size=1024):
|
|
download_size = len(chunk)
|
|
f.write(chunk)
|
|
written += download_size
|
|
if progress:
|
|
progress(advance=1)
|
|
|
|
now = time.time()
|
|
time_since = now - last_speed_refresh
|
|
|
|
download_sizes.append(download_size)
|
|
if time_since > 5 or download_size < 1024:
|
|
data_size = sum(download_sizes)
|
|
download_speed = data_size / (time_since or 1)
|
|
progress(downloaded=f"{filesize.decimal(download_speed)}/s")
|
|
last_speed_refresh = now
|
|
download_sizes.clear()
|
|
if written < file_size:
|
|
raise ValueError(
|
|
f"{url} finished downloading unexpectedly, got {decimal(written)}/{decimal(file_size)}")
|
|
|
|
return 0
|
|
|
|
|
|
__ALL__ = (requests,)
|