From 396ba079d60054543a9de4363555468a2af84ba4 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Thu, 13 Feb 2025 12:37:04 +0200 Subject: [PATCH 1/6] ngclient: Add proxy environment variable handling urllib3 does not handle this but we do want to support proxy users. The environment variable handling is slightly simplified from the requests implementation. Signed-off-by: Jussi Kukkonen --- tests/test_updater_ng.py | 12 +++-- tuf/ngclient/_internal/proxy.py | 96 +++++++++++++++++++++++++++++++++ tuf/ngclient/urllib3_fetcher.py | 5 +- 3 files changed, 107 insertions(+), 6 deletions(-) create mode 100644 tuf/ngclient/_internal/proxy.py diff --git a/tests/test_updater_ng.py b/tests/test_updater_ng.py index 9a89c1deea..f65dced1eb 100644 --- a/tests/test_updater_ng.py +++ b/tests/test_updater_ng.py @@ -330,8 +330,10 @@ def test_non_existing_target_file(self) -> None: def test_user_agent(self) -> None: # test default self.updater.refresh() - session = self.updater._fetcher._poolManager - ua = session.headers["User-Agent"] + poolmgr = self.updater._fetcher._proxy_env.get_pool_manager( + "http", "localhost" + ) + ua = poolmgr.headers["User-Agent"] self.assertEqual(ua[:11], "python-tuf/") # test custom UA @@ -343,8 +345,10 @@ def test_user_agent(self) -> None: config=UpdaterConfig(app_user_agent="MyApp/1.2.3"), ) updater.refresh() - session = updater._fetcher._poolManager - ua = session.headers["User-Agent"] + poolmgr = updater._fetcher._proxy_env.get_pool_manager( + "http", "localhost" + ) + ua = poolmgr.headers["User-Agent"] self.assertEqual(ua[:23], "MyApp/1.2.3 python-tuf/") diff --git a/tuf/ngclient/_internal/proxy.py b/tuf/ngclient/_internal/proxy.py new file mode 100644 index 0000000000..699ef54d3e --- /dev/null +++ b/tuf/ngclient/_internal/proxy.py @@ -0,0 +1,96 @@ +# Copyright New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Proxy environment variable handling with Urllib3""" + +from typing import Any +from urllib.request import getproxies + +from urllib3 import BaseHTTPResponse, PoolManager, ProxyManager +from urllib3.util.url import parse_url + + +# TODO: ProxyEnvironment could implement the whole PoolManager.RequestMethods +# Mixin: We only need request() so nothing else is currently implemented +class ProxyEnvironment: + """A PoolManager manager for automatic proxy handling based on env variables + + Keeps track of PoolManagers for different proxy urls based on proxy + environment variables. Use `get_pool_manager()` or `request()` to access + the right manager for a scheme/host. + + Supports '*_proxy' variables, with special handling for 'no_proxy' and + 'all_proxy'. + """ + + def __init__( + self, + **kw_args: Any, # noqa: ANN401 + ) -> None: + self._pool_managers: dict[str | None, PoolManager] = {} + self._kw_args = kw_args + + self._proxies = getproxies() + self._all_proxy = self._proxies.pop("all", None) + no_proxy = self._proxies.pop("no", None) + if no_proxy is None: + self._no_proxy_hosts = [] + else: + self._no_proxy_hosts = [ + h for h in no_proxy.replace(" ", "").split(",") if h + ] + + def _get_proxy(self, scheme: str | None, host: str | None) -> str | None: + """Get a proxy url for scheme and host based on proxy env variables""" + + if host is None: + # urllib3 only handles http/https but we can do something reasonable + # even for schemes that don't require host (like file) + return None + + # does host match "no_proxy" hosts? + for no_proxy_host in self._no_proxy_hosts: + # exact hostname match or parent domain match + if host == no_proxy_host or host.endswith(f".{no_proxy_host}"): + return None + + if scheme in self._proxies: + return self._proxies[scheme] + if self._all_proxy is not None: + return self._all_proxy + + return None + + def get_pool_manager( + self, scheme: str | None, host: str | None + ) -> PoolManager: + """Get a poolmanager for scheme and host. + + Returns a ProxyManager if that is correct based on current proxy env + variables, otherwise returns a PoolManager + """ + + proxy = self._get_proxy(scheme, host) + if proxy not in self._pool_managers: + if proxy is None: + self._pool_managers[proxy] = PoolManager(**self._kw_args) + else: + self._pool_managers[proxy] = ProxyManager( + proxy, + **self._kw_args, + ) + + return self._pool_managers[proxy] + + def request( + self, + method: str, + url: str, + **request_kw: Any, # noqa: ANN401 + ) -> BaseHTTPResponse: + """Make a request using a PoolManager chosen based on url and + proxy environment variables. + """ + u = parse_url(url) + manager = self.get_pool_manager(u.scheme, u.host) + return manager.request(method, url, **request_kw) diff --git a/tuf/ngclient/urllib3_fetcher.py b/tuf/ngclient/urllib3_fetcher.py index 5d4bb1cf60..88d447bd30 100644 --- a/tuf/ngclient/urllib3_fetcher.py +++ b/tuf/ngclient/urllib3_fetcher.py @@ -15,6 +15,7 @@ import tuf from tuf.api import exceptions +from tuf.ngclient._internal.proxy import ProxyEnvironment from tuf.ngclient.fetcher import FetcherInterface if TYPE_CHECKING: @@ -49,7 +50,7 @@ def __init__( if app_user_agent is not None: ua = f"{app_user_agent} {ua}" - self._poolManager = urllib3.PoolManager(headers={"User-Agent": ua}) + self._proxy_env = ProxyEnvironment(headers={"User-Agent": ua}) def _fetch(self, url: str) -> Iterator[bytes]: """Fetch the contents of HTTP/HTTPS url from a remote server. @@ -72,7 +73,7 @@ def _fetch(self, url: str) -> Iterator[bytes]: # - connect timeout (max delay before first byte is received) # - read (gap) timeout (max delay between bytes received) try: - response = self._poolManager.request( + response = self._proxy_env.request( "GET", url, preload_content=False, From 5f9fefb80f3ea57f21eed47c937c37748fb681da Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 14 Feb 2025 15:39:19 +0200 Subject: [PATCH 2/6] tests: Add tests for ProxyEnvironment This does not actually test using tuf through proxies: it only tests that ProxyEnvironment creates the ProxyManagers that we expect to be created based on the proxy environment variables. Signed-off-by: Jussi Kukkonen --- tests/test_proxy_environment.py | 186 ++++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 tests/test_proxy_environment.py diff --git a/tests/test_proxy_environment.py b/tests/test_proxy_environment.py new file mode 100644 index 0000000000..7019a6c2f7 --- /dev/null +++ b/tests/test_proxy_environment.py @@ -0,0 +1,186 @@ +# Copyright 2025, the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Test ngclient ProxyEnvironment""" + +import sys +import unittest +from unittest.mock import Mock, patch + +from urllib3 import PoolManager, ProxyManager + +from tests import utils +from tuf.ngclient._internal.proxy import ProxyEnvironment + + +class TestProxyEnvironment(unittest.TestCase): + """Test ngclient ProxyEnvironment implementation + + These tests use the ProxyEnvironment.get_pool_manager() endpoint and then + look at the ProxyEnvironment._poolmanagers dict keys to decide if the result + is correct. + + The test environment is changed via mocking getproxies(): this is a urllib + method that returns a dict with the proxy environment variable contents. + + Testing ProxyEnvironment.request() would possibly be better but far more + difficult: the current test implementation does not require actually setting up + all of the different proxies. + """ + + def assert_pool_managers( + self, env: ProxyEnvironment, expected: list[str | None] + ) -> None: + # Pool managers have the expected proxy urls + self.assertEqual(list(env._pool_managers.keys()), expected) + + # Pool manager types are as expected + for proxy_url, pool_manager in env._pool_managers.items(): + self.assertIsInstance(pool_manager, PoolManager) + if proxy_url is not None: + self.assertIsInstance(pool_manager, ProxyManager) + + @patch("tuf.ngclient._internal.proxy.getproxies") + def test_no_variables(self, mock_getproxies: Mock) -> None: + mock_getproxies.return_value = {} + + env = ProxyEnvironment() + env.get_pool_manager("http", "example.com") + env.get_pool_manager("https", "example.com") + env.get_pool_manager("https", "example.com") + env.get_pool_manager("https", "subdomain.example.com") + env.get_pool_manager("https", "differentsite.com") + + # There is a single pool manager (no proxies) + self.assert_pool_managers(env, [None]) + + @patch("tuf.ngclient._internal.proxy.getproxies") + def test_proxy_set(self, mock_getproxies: Mock) -> None: + mock_getproxies.return_value = { + "https": "http://localhost:8888", + } + + env = ProxyEnvironment() + env.get_pool_manager("http", "example.com") + env.get_pool_manager("https", "example.com") + env.get_pool_manager("https", "example.com") + env.get_pool_manager("https", "differentsite.com") + + # There are two pool managers: A plain poolmanager and https proxymanager + self.assert_pool_managers(env, [None, "http://localhost:8888"]) + + @patch("tuf.ngclient._internal.proxy.getproxies") + def test_proxies_set(self, mock_getproxies: Mock) -> None: + mock_getproxies.return_value = { + "http": "http://localhost:8888", + "https": "http://localhost:9999", + } + + env = ProxyEnvironment() + env.get_pool_manager("http", "example.com") + env.get_pool_manager("https", "example.com") + env.get_pool_manager("https", "example.com") + env.get_pool_manager("https", "subdomain.example.com") + env.get_pool_manager("https", "differentsite.com") + + # There are two pool managers: A http proxymanager and https proxymanager + self.assert_pool_managers( + env, ["http://localhost:8888", "http://localhost:9999"] + ) + + @patch("tuf.ngclient._internal.proxy.getproxies") + def test_no_proxy_set(self, mock_getproxies: Mock) -> None: + mock_getproxies.return_value = { + "http": "http://localhost:8888", + "https": "http://localhost:9999", + "no": "somesite.com, example.com, another.site.com", + } + + env = ProxyEnvironment() + env.get_pool_manager("http", "example.com") + env.get_pool_manager("https", "example.com") + env.get_pool_manager("https", "example.com") + + # There is a single pool manager (no proxies) + self.assert_pool_managers(env, [None]) + + env.get_pool_manager("http", "differentsite.com") + env.get_pool_manager("https", "differentsite.com") + + # There are three pool managers: plain poolmanager for no_proxy domains, + # http proxymanager and https proxymanager + self.assert_pool_managers( + env, [None, "http://localhost:8888", "http://localhost:9999"] + ) + + @patch("tuf.ngclient._internal.proxy.getproxies") + def test_no_proxy_subdomain_match(self, mock_getproxies: Mock) -> None: + mock_getproxies.return_value = { + "https": "http://localhost:9999", + "no": "somesite.com, example.com, another.site.com", + } + + env = ProxyEnvironment() + + # this should match example.com in no_proxy + env.get_pool_manager("https", "subdomain.example.com") + + # There is a single pool manager (no proxies) + self.assert_pool_managers(env, [None]) + + # this should not match example.com in no_proxy + env.get_pool_manager("https", "xexample.com") + + # There are two pool managers: plain poolmanager for no_proxy domains, + # and a https proxymanager + self.assert_pool_managers(env, [None, "http://localhost:9999"]) + + @patch("tuf.ngclient._internal.proxy.getproxies") + def test_all_proxy_set(self, mock_getproxies: Mock) -> None: + mock_getproxies.return_value = { + "all": "http://localhost:8888", + } + + env = ProxyEnvironment() + env.get_pool_manager("http", "example.com") + env.get_pool_manager("https", "example.com") + env.get_pool_manager("https", "example.com") + env.get_pool_manager("https", "subdomain.example.com") + env.get_pool_manager("https", "differentsite.com") + + # There is a single proxy manager + self.assert_pool_managers(env, ["http://localhost:8888"]) + + # This urllib3 currently only handles http and https but let's test anyway + env.get_pool_manager("file", None) + + # proxy manager and a plain pool manager + self.assert_pool_managers(env, ["http://localhost:8888", None]) + + @patch("tuf.ngclient._internal.proxy.getproxies") + def test_all_proxy_and_no_proxy_set(self, mock_getproxies: Mock) -> None: + mock_getproxies.return_value = { + "all": "http://localhost:8888", + "no": "somesite.com, example.com, another.site.com", + } + + env = ProxyEnvironment() + env.get_pool_manager("http", "example.com") + env.get_pool_manager("https", "example.com") + env.get_pool_manager("https", "example.com") + env.get_pool_manager("https", "subdomain.example.com") + + # There is a single pool manager (no proxies) + self.assert_pool_managers(env, [None]) + + env.get_pool_manager("http", "differentsite.com") + env.get_pool_manager("https", "differentsite.com") + + # There are two pool managers: plain poolmanager for no_proxy domains and + # one proxymanager + self.assert_pool_managers(env, [None, "http://localhost:8888"]) + + +if __name__ == "__main__": + utils.configure_test_logging(sys.argv) + unittest.main() From 80b629013e823770f28cbcc99a7f1456c4d00d42 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 14 Feb 2025 17:59:20 +0200 Subject: [PATCH 3/6] Use __future__ to make old python happy Signed-off-by: Jussi Kukkonen --- tests/test_proxy_environment.py | 2 ++ tuf/ngclient/_internal/proxy.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tests/test_proxy_environment.py b/tests/test_proxy_environment.py index 7019a6c2f7..f7bb0b7baa 100644 --- a/tests/test_proxy_environment.py +++ b/tests/test_proxy_environment.py @@ -3,6 +3,8 @@ """Test ngclient ProxyEnvironment""" +from __future__ import annotations + import sys import unittest from unittest.mock import Mock, patch diff --git a/tuf/ngclient/_internal/proxy.py b/tuf/ngclient/_internal/proxy.py index 699ef54d3e..19bc4b64f2 100644 --- a/tuf/ngclient/_internal/proxy.py +++ b/tuf/ngclient/_internal/proxy.py @@ -3,6 +3,8 @@ """Proxy environment variable handling with Urllib3""" +from __future__ import annotations + from typing import Any from urllib.request import getproxies From 9a4e749def89405c067c8824c022a0dacf9369c4 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 14 Feb 2025 18:43:24 +0200 Subject: [PATCH 4/6] ngclient: Add docs on HTTP in general Signed-off-by: Jussi Kukkonen --- tuf/ngclient/updater.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tuf/ngclient/updater.py b/tuf/ngclient/updater.py index 8c88a96ead..0dfab2a31d 100644 --- a/tuf/ngclient/updater.py +++ b/tuf/ngclient/updater.py @@ -35,6 +35,19 @@ A simple example of using the Updater to implement a Python TUF client that downloads target files is available in `examples/client `_. + +Notes on how Updater uses HTTP by default: + * urllib3 is the HTTP library + * Typically all requests are retried by urllib3 three times (in cases where + this seems useful) + * Operating system certificate store is used for TLS, in other words + ``certifi`` is not used as the certificate source + * Proxy use can be configured with ``https_proxy`` and other similar + environment variables + +All of the HTTP decisions can be changed with ``fetcher`` argument: +Custom ``FetcherInterface`` implementations are possible. The alternative +``RequestsFetcher`` implementation is also provided (although deprecated). """ from __future__ import annotations From 265e772dba20c8b083a39df552382bba48fbfd6a Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Fri, 14 Feb 2025 21:48:07 +0200 Subject: [PATCH 5/6] ProxyEnvironment: Handle no_proxy="*" Add support for leading dots in no_proxy and "*" as a no_proxy value. Both are supported in requests and based on https://about.gitlab.com/blog/2021/01/27/we-need-to-talk-no-proxy/ both are somewhat common. Signed-off-by: Jussi Kukkonen --- tests/test_proxy_environment.py | 29 +++++++++++++++++++++++++++++ tuf/ngclient/_internal/proxy.py | 11 +++++++---- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/tests/test_proxy_environment.py b/tests/test_proxy_environment.py index f7bb0b7baa..ade7b35002 100644 --- a/tests/test_proxy_environment.py +++ b/tests/test_proxy_environment.py @@ -137,6 +137,35 @@ def test_no_proxy_subdomain_match(self, mock_getproxies: Mock) -> None: # and a https proxymanager self.assert_pool_managers(env, [None, "http://localhost:9999"]) + @patch("tuf.ngclient._internal.proxy.getproxies") + def test_no_proxy_wildcard(self, mock_getproxies: Mock) -> None: + mock_getproxies.return_value = { + "https": "http://localhost:8888", + "no": "*", + } + + env = ProxyEnvironment() + env.get_pool_manager("https", "example.com") + env.get_pool_manager("https", "differentsite.com") + env.get_pool_manager("https", "subdomain.example.com") + + # There is a single pool manager, no proxies + self.assert_pool_managers(env, [None]) + + @patch("tuf.ngclient._internal.proxy.getproxies") + def test_no_proxy_leading_dot(self, mock_getproxies: Mock) -> None: + mock_getproxies.return_value = { + "https": "http://localhost:8888", + "no": ".example.com", + } + + env = ProxyEnvironment() + env.get_pool_manager("https", "example.com") + env.get_pool_manager("https", "subdomain.example.com") + + # There is a single pool manager, no proxies + self.assert_pool_managers(env, [None]) + @patch("tuf.ngclient._internal.proxy.getproxies") def test_all_proxy_set(self, mock_getproxies: Mock) -> None: mock_getproxies.return_value = { diff --git a/tuf/ngclient/_internal/proxy.py b/tuf/ngclient/_internal/proxy.py index 19bc4b64f2..b42ea2f415 100644 --- a/tuf/ngclient/_internal/proxy.py +++ b/tuf/ngclient/_internal/proxy.py @@ -38,8 +38,9 @@ def __init__( if no_proxy is None: self._no_proxy_hosts = [] else: + # split by comma, remove leading periods self._no_proxy_hosts = [ - h for h in no_proxy.replace(" ", "").split(",") if h + h.lstrip(".") for h in no_proxy.replace(" ", "").split(",") if h ] def _get_proxy(self, scheme: str | None, host: str | None) -> str | None: @@ -50,10 +51,12 @@ def _get_proxy(self, scheme: str | None, host: str | None) -> str | None: # even for schemes that don't require host (like file) return None - # does host match "no_proxy" hosts? + # does host match any of the "no_proxy" hosts? for no_proxy_host in self._no_proxy_hosts: - # exact hostname match or parent domain match - if host == no_proxy_host or host.endswith(f".{no_proxy_host}"): + # wildcard match, exact hostname match, or parent domain match + if no_proxy_host in ("*", host) or host.endswith( + f".{no_proxy_host}" + ): return None if scheme in self._proxies: From 98fcd7160c8ae486b61db87c1223c7dd40f856b3 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Sun, 16 Feb 2025 11:15:12 +0200 Subject: [PATCH 6/6] Changelog: Add missing entries Signed-off-by: Jussi Kukkonen --- docs/CHANGELOG.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index a1203eb956..d3ccf45313 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,5 +1,21 @@ # Changelog +## Unreleased + +### Changed + +This release is API compatible but contains a major internal change in the HTTP handling. + +* ngclient: urllib3 is used as the HTTP library by default instead of requests (#2762, + #2773, #2789) + * This removes dependencies on `requests`, `idna`, `charset-normalizer` and `certifi` + * The deprecated RequestsFetcher implementation is available but requires selecting + the fetcher at Updater initialization and explicitly depending on requests +* ngclient: TLS certificate source was changed. Certificates now come from operating + system certificate store instead of `certifi` (#2762) +* Test infrastucture has improved and should now be more usable externally, e.g. in + distro test suites (#2749) + ## v5.1.0 ### Changed