youtube-summarizer/venv311/lib/python3.11/site-packages/youtube_transcript_api/proxies.py

183 lines
7.2 KiB
Python

from abc import ABC, abstractmethod
from typing import TypedDict, Optional, List
class InvalidProxyConfig(Exception):
pass
class RequestsProxyConfigDict(TypedDict):
"""
This type represents the Dict that is used by the requests library to configure
the proxies used. More information on this can be found in the official requests
documentation: https://requests.readthedocs.io/en/latest/user/advanced/#proxies
"""
http: str
https: str
class ProxyConfig(ABC):
"""
The base class for all proxy configs. Anything can be a proxy config, as longs as
it can be turned into a `RequestsProxyConfigDict` by calling `to_requests_dict`.
"""
@abstractmethod
def to_requests_dict(self) -> RequestsProxyConfigDict:
"""
Turns this proxy config into the Dict that is expected by the requests library.
More information on this can be found in the official requests documentation:
https://requests.readthedocs.io/en/latest/user/advanced/#proxies
"""
pass
@property
def prevent_keeping_connections_alive(self) -> bool:
"""
If you are using rotating proxies, it can be useful to prevent the HTTP
client from keeping TCP connections alive, as your IP won't be rotated on
every request, if your connection stays open.
"""
return False
@property
def retries_when_blocked(self) -> int:
"""
Defines how many times we should retry if a request is blocked. When using
rotating residential proxies with a large IP pool it can make sense to retry a
couple of times when a blocked IP is encountered, since a retry will trigger
an IP rotation and the next IP might not be blocked.
"""
return 0
class GenericProxyConfig(ProxyConfig):
"""
This proxy config can be used to set up any generic HTTP/HTTPS/SOCKS proxy. As it
the requests library is used under the hood, you can follow the requests
documentation to get more detailed information on how to set up proxies:
https://requests.readthedocs.io/en/latest/user/advanced/#proxies
If only an HTTP or an HTTPS proxy is provided, it will be used for both types of
connections. However, you will have to provide at least one of the two.
"""
def __init__(self, http_url: Optional[str] = None, https_url: Optional[str] = None):
"""
If only an HTTP or an HTTPS proxy is provided, it will be used for both types of
connections. However, you will have to provide at least one of the two.
:param http_url: the proxy URL used for HTTP requests. Defaults to `https_url`
if None.
:param https_url: the proxy URL used for HTTPS requests. Defaults to `http_url`
if None.
"""
if not http_url and not https_url:
raise InvalidProxyConfig(
"GenericProxyConfig requires you to define at least one of the two: "
"http or https"
)
self.http_url = http_url
self.https_url = https_url
def to_requests_dict(self) -> RequestsProxyConfigDict:
return {
"http": self.http_url or self.https_url,
"https": self.https_url or self.http_url,
}
class WebshareProxyConfig(GenericProxyConfig):
"""
Webshare is a provider offering rotating residential proxies, which is the
most reliable way to work around being blocked by YouTube.
If you don't have a Webshare account yet, you will have to create one
at https://www.webshare.io/?referral_code=w0xno53eb50g and purchase a "Residential"
proxy package that suits your workload, to be able to use this proxy config (make
sure NOT to purchase "Proxy Server" or "Static Residential"!).
Once you have created an account you only need the "Proxy Username" and
"Proxy Password" that you can find in your Webshare settings
at https://dashboard.webshare.io/proxy/settings to set up this config class, which
will take care of setting up your proxies as needed, by defaulting to rotating
proxies.
Note that referral links are used here and any purchases made through these links
will support this Open Source project, which is very much appreciated! :)
However, you can of course integrate your own proxy solution by using the
`GenericProxyConfig` class, if that's what you prefer.
"""
DEFAULT_DOMAIN_NAME = "p.webshare.io"
DEFAULT_PORT = 80
def __init__(
self,
proxy_username: str,
proxy_password: str,
filter_ip_locations: Optional[List[str]] = None,
retries_when_blocked: int = 10,
domain_name: str = DEFAULT_DOMAIN_NAME,
proxy_port: int = DEFAULT_PORT,
):
"""
Once you have created a Webshare account at
https://www.webshare.io/?referral_code=w0xno53eb50g and purchased a
"Residential" package (make sure NOT to purchase "Proxy Server" or
"Static Residential"!), this config class allows you to easily use it,
by defaulting to the most reliable proxy settings (rotating residential
proxies).
:param proxy_username: "Proxy Username" found at
https://dashboard.webshare.io/proxy/settings
:param proxy_password: "Proxy Password" found at
https://dashboard.webshare.io/proxy/settings
:param filter_ip_locations: If you want to limit the pool of IPs that you will
be rotating through to those located in specific countries, you can provide
a list of location codes here. By choosing locations that are close to the
machine that is running this code, you can reduce latency. Also, this can
be used to work around location-based restrictions.
You can find the full list of available locations (and how many IPs are
available in each location) at
https://www.webshare.io/features/proxy-locations?referral_code=w0xno53eb50g
:param retries_when_blocked: Define how many times we should retry if a request
is blocked. When using rotating residential proxies with a large IP pool it
makes sense to retry a couple of times when a blocked IP is encountered,
since a retry will trigger an IP rotation and the next IP might not be
blocked. Defaults to 10.
"""
self.proxy_username = proxy_username
self.proxy_password = proxy_password
self.domain_name = domain_name
self.proxy_port = proxy_port
self._filter_ip_locations = filter_ip_locations or []
self._retries_when_blocked = retries_when_blocked
@property
def url(self) -> str:
location_codes = "".join(
f"-{location_code.upper()}" for location_code in self._filter_ip_locations
)
return (
f"http://{self.proxy_username}{location_codes}-rotate:{self.proxy_password}"
f"@{self.domain_name}:{self.proxy_port}/"
)
@property
def http_url(self) -> str:
return self.url
@property
def https_url(self) -> str:
return self.url
@property
def prevent_keeping_connections_alive(self) -> bool:
return True
@property
def retries_when_blocked(self) -> int:
return self._retries_when_blocked