Skip to content

Handle proxy variables #2789

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,21 @@
# Changelog

## Unreleased

### Changed

This release is API compatible but contains a major internal change in the HTTP handling.

* ngclient: urllib3 is used as the HTTP library by default instead of requests (#2762,
#2773, #2789)
* This removes dependencies on `requests`, `idna`, `charset-normalizer` and `certifi`
* The deprecated RequestsFetcher implementation is available but requires selecting
the fetcher at Updater initialization and explicitly depending on requests
* ngclient: TLS certificate source was changed. Certificates now come from operating
system certificate store instead of `certifi` (#2762)
* Test infrastucture has improved and should now be more usable externally, e.g. in
distro test suites (#2749)

## v5.1.0

### Changed
Expand Down
217 changes: 217 additions & 0 deletions tests/test_proxy_environment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
# Copyright 2025, the TUF contributors
# SPDX-License-Identifier: MIT OR Apache-2.0

"""Test ngclient ProxyEnvironment"""

from __future__ import annotations

import sys
import unittest
from unittest.mock import Mock, patch

from urllib3 import PoolManager, ProxyManager

from tests import utils
from tuf.ngclient._internal.proxy import ProxyEnvironment


class TestProxyEnvironment(unittest.TestCase):
"""Test ngclient ProxyEnvironment implementation

These tests use the ProxyEnvironment.get_pool_manager() endpoint and then
look at the ProxyEnvironment._poolmanagers dict keys to decide if the result
is correct.

The test environment is changed via mocking getproxies(): this is a urllib
method that returns a dict with the proxy environment variable contents.

Testing ProxyEnvironment.request() would possibly be better but far more
difficult: the current test implementation does not require actually setting up
all of the different proxies.
"""

def assert_pool_managers(
self, env: ProxyEnvironment, expected: list[str | None]
) -> None:
# Pool managers have the expected proxy urls
self.assertEqual(list(env._pool_managers.keys()), expected)

# Pool manager types are as expected
for proxy_url, pool_manager in env._pool_managers.items():
self.assertIsInstance(pool_manager, PoolManager)
if proxy_url is not None:
self.assertIsInstance(pool_manager, ProxyManager)

@patch("tuf.ngclient._internal.proxy.getproxies")
def test_no_variables(self, mock_getproxies: Mock) -> None:
mock_getproxies.return_value = {}

env = ProxyEnvironment()
env.get_pool_manager("http", "example.com")
env.get_pool_manager("https", "example.com")
env.get_pool_manager("https", "example.com")
env.get_pool_manager("https", "subdomain.example.com")
env.get_pool_manager("https", "differentsite.com")

# There is a single pool manager (no proxies)
self.assert_pool_managers(env, [None])

@patch("tuf.ngclient._internal.proxy.getproxies")
def test_proxy_set(self, mock_getproxies: Mock) -> None:
mock_getproxies.return_value = {
"https": "http://localhost:8888",
}

env = ProxyEnvironment()
env.get_pool_manager("http", "example.com")
env.get_pool_manager("https", "example.com")
env.get_pool_manager("https", "example.com")
env.get_pool_manager("https", "differentsite.com")

# There are two pool managers: A plain poolmanager and https proxymanager
self.assert_pool_managers(env, [None, "http://localhost:8888"])

@patch("tuf.ngclient._internal.proxy.getproxies")
def test_proxies_set(self, mock_getproxies: Mock) -> None:
mock_getproxies.return_value = {
"http": "http://localhost:8888",
"https": "http://localhost:9999",
}

env = ProxyEnvironment()
env.get_pool_manager("http", "example.com")
env.get_pool_manager("https", "example.com")
env.get_pool_manager("https", "example.com")
env.get_pool_manager("https", "subdomain.example.com")
env.get_pool_manager("https", "differentsite.com")

# There are two pool managers: A http proxymanager and https proxymanager
self.assert_pool_managers(
env, ["http://localhost:8888", "http://localhost:9999"]
)

@patch("tuf.ngclient._internal.proxy.getproxies")
def test_no_proxy_set(self, mock_getproxies: Mock) -> None:
mock_getproxies.return_value = {
"http": "http://localhost:8888",
"https": "http://localhost:9999",
"no": "somesite.com, example.com, another.site.com",
}

env = ProxyEnvironment()
env.get_pool_manager("http", "example.com")
env.get_pool_manager("https", "example.com")
env.get_pool_manager("https", "example.com")

# There is a single pool manager (no proxies)
self.assert_pool_managers(env, [None])

env.get_pool_manager("http", "differentsite.com")
env.get_pool_manager("https", "differentsite.com")

# There are three pool managers: plain poolmanager for no_proxy domains,
# http proxymanager and https proxymanager
self.assert_pool_managers(
env, [None, "http://localhost:8888", "http://localhost:9999"]
)

@patch("tuf.ngclient._internal.proxy.getproxies")
def test_no_proxy_subdomain_match(self, mock_getproxies: Mock) -> None:
mock_getproxies.return_value = {
"https": "http://localhost:9999",
"no": "somesite.com, example.com, another.site.com",
}

env = ProxyEnvironment()

# this should match example.com in no_proxy
env.get_pool_manager("https", "subdomain.example.com")

# There is a single pool manager (no proxies)
self.assert_pool_managers(env, [None])

# this should not match example.com in no_proxy
env.get_pool_manager("https", "xexample.com")

# There are two pool managers: plain poolmanager for no_proxy domains,
# and a https proxymanager
self.assert_pool_managers(env, [None, "http://localhost:9999"])

@patch("tuf.ngclient._internal.proxy.getproxies")
def test_no_proxy_wildcard(self, mock_getproxies: Mock) -> None:
mock_getproxies.return_value = {
"https": "http://localhost:8888",
"no": "*",
}

env = ProxyEnvironment()
env.get_pool_manager("https", "example.com")
env.get_pool_manager("https", "differentsite.com")
env.get_pool_manager("https", "subdomain.example.com")

# There is a single pool manager, no proxies
self.assert_pool_managers(env, [None])

@patch("tuf.ngclient._internal.proxy.getproxies")
def test_no_proxy_leading_dot(self, mock_getproxies: Mock) -> None:
mock_getproxies.return_value = {
"https": "http://localhost:8888",
"no": ".example.com",
}

env = ProxyEnvironment()
env.get_pool_manager("https", "example.com")
env.get_pool_manager("https", "subdomain.example.com")

# There is a single pool manager, no proxies
self.assert_pool_managers(env, [None])

@patch("tuf.ngclient._internal.proxy.getproxies")
def test_all_proxy_set(self, mock_getproxies: Mock) -> None:
mock_getproxies.return_value = {
"all": "http://localhost:8888",
}

env = ProxyEnvironment()
env.get_pool_manager("http", "example.com")
env.get_pool_manager("https", "example.com")
env.get_pool_manager("https", "example.com")
env.get_pool_manager("https", "subdomain.example.com")
env.get_pool_manager("https", "differentsite.com")

# There is a single proxy manager
self.assert_pool_managers(env, ["http://localhost:8888"])

# This urllib3 currently only handles http and https but let's test anyway
env.get_pool_manager("file", None)

# proxy manager and a plain pool manager
self.assert_pool_managers(env, ["http://localhost:8888", None])

@patch("tuf.ngclient._internal.proxy.getproxies")
def test_all_proxy_and_no_proxy_set(self, mock_getproxies: Mock) -> None:
mock_getproxies.return_value = {
"all": "http://localhost:8888",
"no": "somesite.com, example.com, another.site.com",
}

env = ProxyEnvironment()
env.get_pool_manager("http", "example.com")
env.get_pool_manager("https", "example.com")
env.get_pool_manager("https", "example.com")
env.get_pool_manager("https", "subdomain.example.com")

# There is a single pool manager (no proxies)
self.assert_pool_managers(env, [None])

env.get_pool_manager("http", "differentsite.com")
env.get_pool_manager("https", "differentsite.com")

# There are two pool managers: plain poolmanager for no_proxy domains and
# one proxymanager
self.assert_pool_managers(env, [None, "http://localhost:8888"])


if __name__ == "__main__":
utils.configure_test_logging(sys.argv)
unittest.main()
12 changes: 8 additions & 4 deletions tests/test_updater_ng.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,8 +330,10 @@ def test_non_existing_target_file(self) -> None:
def test_user_agent(self) -> None:
# test default
self.updater.refresh()
session = self.updater._fetcher._poolManager
ua = session.headers["User-Agent"]
poolmgr = self.updater._fetcher._proxy_env.get_pool_manager(
"http", "localhost"
)
ua = poolmgr.headers["User-Agent"]
self.assertEqual(ua[:11], "python-tuf/")

# test custom UA
Expand All @@ -343,8 +345,10 @@ def test_user_agent(self) -> None:
config=UpdaterConfig(app_user_agent="MyApp/1.2.3"),
)
updater.refresh()
session = updater._fetcher._poolManager
ua = session.headers["User-Agent"]
poolmgr = updater._fetcher._proxy_env.get_pool_manager(
"http", "localhost"
)
ua = poolmgr.headers["User-Agent"]

self.assertEqual(ua[:23], "MyApp/1.2.3 python-tuf/")

Expand Down
101 changes: 101 additions & 0 deletions tuf/ngclient/_internal/proxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Copyright New York University and the TUF contributors
# SPDX-License-Identifier: MIT OR Apache-2.0

"""Proxy environment variable handling with Urllib3"""

from __future__ import annotations

from typing import Any
from urllib.request import getproxies
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we could implement this method ourselves as well but I tried to keep this minimal


from urllib3 import BaseHTTPResponse, PoolManager, ProxyManager
from urllib3.util.url import parse_url


# TODO: ProxyEnvironment could implement the whole PoolManager.RequestMethods
# Mixin: We only need request() so nothing else is currently implemented
class ProxyEnvironment:
"""A PoolManager manager for automatic proxy handling based on env variables

Keeps track of PoolManagers for different proxy urls based on proxy
environment variables. Use `get_pool_manager()` or `request()` to access
the right manager for a scheme/host.

Supports '*_proxy' variables, with special handling for 'no_proxy' and
'all_proxy'.
"""

def __init__(
self,
**kw_args: Any, # noqa: ANN401
) -> None:
self._pool_managers: dict[str | None, PoolManager] = {}
self._kw_args = kw_args

self._proxies = getproxies()
self._all_proxy = self._proxies.pop("all", None)
no_proxy = self._proxies.pop("no", None)
if no_proxy is None:
self._no_proxy_hosts = []
else:
# split by comma, remove leading periods
self._no_proxy_hosts = [
h.lstrip(".") for h in no_proxy.replace(" ", "").split(",") if h
]

def _get_proxy(self, scheme: str | None, host: str | None) -> str | None:
"""Get a proxy url for scheme and host based on proxy env variables"""

if host is None:
# urllib3 only handles http/https but we can do something reasonable
# even for schemes that don't require host (like file)
return None

# does host match any of the "no_proxy" hosts?
for no_proxy_host in self._no_proxy_hosts:
# wildcard match, exact hostname match, or parent domain match
if no_proxy_host in ("*", host) or host.endswith(
f".{no_proxy_host}"
):
return None

if scheme in self._proxies:
return self._proxies[scheme]
if self._all_proxy is not None:
return self._all_proxy

return None

def get_pool_manager(
self, scheme: str | None, host: str | None
) -> PoolManager:
"""Get a poolmanager for scheme and host.

Returns a ProxyManager if that is correct based on current proxy env
variables, otherwise returns a PoolManager
"""

proxy = self._get_proxy(scheme, host)
if proxy not in self._pool_managers:
if proxy is None:
self._pool_managers[proxy] = PoolManager(**self._kw_args)
else:
self._pool_managers[proxy] = ProxyManager(
proxy,
**self._kw_args,
)

return self._pool_managers[proxy]

def request(
self,
method: str,
url: str,
**request_kw: Any, # noqa: ANN401
) -> BaseHTTPResponse:
"""Make a request using a PoolManager chosen based on url and
proxy environment variables.
"""
u = parse_url(url)
manager = self.get_pool_manager(u.scheme, u.host)
return manager.request(method, url, **request_kw)
13 changes: 13 additions & 0 deletions tuf/ngclient/updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,19 @@
A simple example of using the Updater to implement a Python TUF client that
downloads target files is available in `examples/client
<https://github.com/theupdateframework/python-tuf/tree/develop/examples/client>`_.

Notes on how Updater uses HTTP by default:
* urllib3 is the HTTP library
* Typically all requests are retried by urllib3 three times (in cases where
this seems useful)
* Operating system certificate store is used for TLS, in other words
``certifi`` is not used as the certificate source
* Proxy use can be configured with ``https_proxy`` and other similar
environment variables

All of the HTTP decisions can be changed with ``fetcher`` argument:
Custom ``FetcherInterface`` implementations are possible. The alternative
``RequestsFetcher`` implementation is also provided (although deprecated).
"""

from __future__ import annotations
Expand Down
Loading