From f25d2419cf60b537aff1ce7a67b78603b749f74a Mon Sep 17 00:00:00 2001 From: rlaphoenix Date: Mon, 1 Apr 2024 14:59:53 +0100 Subject: [PATCH] fix(curl-impersonate): Set Cert-Authority Bundle for HTTPS Proxies For some reason curl-impersonate (curl_cffi project) does not set the certificate-authority bundle for proxies, which to be fair is for some reason seperated into two curl-options. Doing this change as well as removing the https->http scheme enforcement on proxies, fixes HTTPS proxies on the curl-impersonate downloaders. I also simplified the seperate http and https proxy definitions to the `all` definition which was not originally supported but does seem to be supported as of v0.6.2. I tested this on NordVPN proxies which are explicitly HTTPS-only and it does work. --- devine/core/downloaders/curl_impersonate.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/devine/core/downloaders/curl_impersonate.py b/devine/core/downloaders/curl_impersonate.py index 3bb6405..9f724e3 100644 --- a/devine/core/downloaders/curl_impersonate.py +++ b/devine/core/downloaders/curl_impersonate.py @@ -6,6 +6,7 @@ from http.cookiejar import CookieJar from pathlib import Path from typing import Any, Generator, MutableMapping, Optional, Union +from curl_cffi import CurlOpt from curl_cffi.requests import Session from rich import filesize @@ -23,7 +24,7 @@ BROWSER = config.curl_impersonate.get("browser", "chrome120") def download( url: str, save_path: Path, - session: Optional[Session] = None, + session: Session, **kwargs: Any ) -> Generator[dict[str, Any], None, None]: """ @@ -52,8 +53,10 @@ def download( for one-time request changes like a header, cookie, or proxy. For example, to request Byte-ranges use e.g., `headers={"Range": "bytes=0-128"}`. """ - if not session: - session = Session(impersonate=BROWSER) + # https://github.com/yifeikong/curl_cffi/issues/6#issuecomment-2028518677 + # must be applied here since the `session.curl` is thread-localized + # noinspection PyProtectedMember + session.curl.setopt(CurlOpt.PROXY_CAINFO, session.curl._cacert) save_dir = save_path.parent control_file = save_path.with_name(f"{save_path.name}.!dev") @@ -224,10 +227,7 @@ def curl_impersonate( if cookies: session.cookies.update(cookies) if proxy: - session.proxies.update({ - "http": proxy.replace("https://", "http://"), - "https": proxy.replace("https://", "http://") - }) + session.proxies.update({"all": proxy}) yield dict(total=len(urls))