183 lines
7.2 KiB
Python
183 lines
7.2 KiB
Python
from abc import ABC, abstractmethod
|
|
from typing import TypedDict, Optional, List
|
|
|
|
|
|
class InvalidProxyConfig(Exception):
|
|
pass
|
|
|
|
|
|
class RequestsProxyConfigDict(TypedDict):
|
|
"""
|
|
This type represents the Dict that is used by the requests library to configure
|
|
the proxies used. More information on this can be found in the official requests
|
|
documentation: https://requests.readthedocs.io/en/latest/user/advanced/#proxies
|
|
"""
|
|
|
|
http: str
|
|
https: str
|
|
|
|
|
|
class ProxyConfig(ABC):
|
|
"""
|
|
The base class for all proxy configs. Anything can be a proxy config, as longs as
|
|
it can be turned into a `RequestsProxyConfigDict` by calling `to_requests_dict`.
|
|
"""
|
|
|
|
@abstractmethod
|
|
def to_requests_dict(self) -> RequestsProxyConfigDict:
|
|
"""
|
|
Turns this proxy config into the Dict that is expected by the requests library.
|
|
More information on this can be found in the official requests documentation:
|
|
https://requests.readthedocs.io/en/latest/user/advanced/#proxies
|
|
"""
|
|
pass
|
|
|
|
@property
|
|
def prevent_keeping_connections_alive(self) -> bool:
|
|
"""
|
|
If you are using rotating proxies, it can be useful to prevent the HTTP
|
|
client from keeping TCP connections alive, as your IP won't be rotated on
|
|
every request, if your connection stays open.
|
|
"""
|
|
return False
|
|
|
|
@property
|
|
def retries_when_blocked(self) -> int:
|
|
"""
|
|
Defines how many times we should retry if a request is blocked. When using
|
|
rotating residential proxies with a large IP pool it can make sense to retry a
|
|
couple of times when a blocked IP is encountered, since a retry will trigger
|
|
an IP rotation and the next IP might not be blocked.
|
|
"""
|
|
return 0
|
|
|
|
|
|
class GenericProxyConfig(ProxyConfig):
|
|
"""
|
|
This proxy config can be used to set up any generic HTTP/HTTPS/SOCKS proxy. As it
|
|
the requests library is used under the hood, you can follow the requests
|
|
documentation to get more detailed information on how to set up proxies:
|
|
https://requests.readthedocs.io/en/latest/user/advanced/#proxies
|
|
|
|
If only an HTTP or an HTTPS proxy is provided, it will be used for both types of
|
|
connections. However, you will have to provide at least one of the two.
|
|
"""
|
|
|
|
def __init__(self, http_url: Optional[str] = None, https_url: Optional[str] = None):
|
|
"""
|
|
If only an HTTP or an HTTPS proxy is provided, it will be used for both types of
|
|
connections. However, you will have to provide at least one of the two.
|
|
|
|
:param http_url: the proxy URL used for HTTP requests. Defaults to `https_url`
|
|
if None.
|
|
:param https_url: the proxy URL used for HTTPS requests. Defaults to `http_url`
|
|
if None.
|
|
"""
|
|
if not http_url and not https_url:
|
|
raise InvalidProxyConfig(
|
|
"GenericProxyConfig requires you to define at least one of the two: "
|
|
"http or https"
|
|
)
|
|
self.http_url = http_url
|
|
self.https_url = https_url
|
|
|
|
def to_requests_dict(self) -> RequestsProxyConfigDict:
|
|
return {
|
|
"http": self.http_url or self.https_url,
|
|
"https": self.https_url or self.http_url,
|
|
}
|
|
|
|
|
|
class WebshareProxyConfig(GenericProxyConfig):
|
|
"""
|
|
Webshare is a provider offering rotating residential proxies, which is the
|
|
most reliable way to work around being blocked by YouTube.
|
|
|
|
If you don't have a Webshare account yet, you will have to create one
|
|
at https://www.webshare.io/?referral_code=w0xno53eb50g and purchase a "Residential"
|
|
proxy package that suits your workload, to be able to use this proxy config (make
|
|
sure NOT to purchase "Proxy Server" or "Static Residential"!).
|
|
|
|
Once you have created an account you only need the "Proxy Username" and
|
|
"Proxy Password" that you can find in your Webshare settings
|
|
at https://dashboard.webshare.io/proxy/settings to set up this config class, which
|
|
will take care of setting up your proxies as needed, by defaulting to rotating
|
|
proxies.
|
|
|
|
Note that referral links are used here and any purchases made through these links
|
|
will support this Open Source project, which is very much appreciated! :)
|
|
However, you can of course integrate your own proxy solution by using the
|
|
`GenericProxyConfig` class, if that's what you prefer.
|
|
"""
|
|
|
|
DEFAULT_DOMAIN_NAME = "p.webshare.io"
|
|
DEFAULT_PORT = 80
|
|
|
|
def __init__(
|
|
self,
|
|
proxy_username: str,
|
|
proxy_password: str,
|
|
filter_ip_locations: Optional[List[str]] = None,
|
|
retries_when_blocked: int = 10,
|
|
domain_name: str = DEFAULT_DOMAIN_NAME,
|
|
proxy_port: int = DEFAULT_PORT,
|
|
):
|
|
"""
|
|
Once you have created a Webshare account at
|
|
https://www.webshare.io/?referral_code=w0xno53eb50g and purchased a
|
|
"Residential" package (make sure NOT to purchase "Proxy Server" or
|
|
"Static Residential"!), this config class allows you to easily use it,
|
|
by defaulting to the most reliable proxy settings (rotating residential
|
|
proxies).
|
|
|
|
:param proxy_username: "Proxy Username" found at
|
|
https://dashboard.webshare.io/proxy/settings
|
|
:param proxy_password: "Proxy Password" found at
|
|
https://dashboard.webshare.io/proxy/settings
|
|
:param filter_ip_locations: If you want to limit the pool of IPs that you will
|
|
be rotating through to those located in specific countries, you can provide
|
|
a list of location codes here. By choosing locations that are close to the
|
|
machine that is running this code, you can reduce latency. Also, this can
|
|
be used to work around location-based restrictions.
|
|
You can find the full list of available locations (and how many IPs are
|
|
available in each location) at
|
|
https://www.webshare.io/features/proxy-locations?referral_code=w0xno53eb50g
|
|
:param retries_when_blocked: Define how many times we should retry if a request
|
|
is blocked. When using rotating residential proxies with a large IP pool it
|
|
makes sense to retry a couple of times when a blocked IP is encountered,
|
|
since a retry will trigger an IP rotation and the next IP might not be
|
|
blocked. Defaults to 10.
|
|
"""
|
|
self.proxy_username = proxy_username
|
|
self.proxy_password = proxy_password
|
|
self.domain_name = domain_name
|
|
self.proxy_port = proxy_port
|
|
self._filter_ip_locations = filter_ip_locations or []
|
|
self._retries_when_blocked = retries_when_blocked
|
|
|
|
@property
|
|
def url(self) -> str:
|
|
location_codes = "".join(
|
|
f"-{location_code.upper()}" for location_code in self._filter_ip_locations
|
|
)
|
|
return (
|
|
f"http://{self.proxy_username}{location_codes}-rotate:{self.proxy_password}"
|
|
f"@{self.domain_name}:{self.proxy_port}/"
|
|
)
|
|
|
|
@property
|
|
def http_url(self) -> str:
|
|
return self.url
|
|
|
|
@property
|
|
def https_url(self) -> str:
|
|
return self.url
|
|
|
|
@property
|
|
def prevent_keeping_connections_alive(self) -> bool:
|
|
return True
|
|
|
|
@property
|
|
def retries_when_blocked(self) -> int:
|
|
return self._retries_when_blocked
|