diff --git a/src/python_inspector/utils_pypi.py b/src/python_inspector/utils_pypi.py index 690aabec..95cf61eb 100644 --- a/src/python_inspector/utils_pypi.py +++ b/src/python_inspector/utils_pypi.py @@ -26,6 +26,7 @@ from typing import Union from urllib.parse import quote_plus from urllib.parse import unquote +from urllib.parse import urljoin from urllib.parse import urlparse from urllib.parse import urlunparse @@ -1631,25 +1632,27 @@ async def fetch_links( def resolve_relative_url(package_url, url): """ - Return the resolved `url` URLstring given a `package_url` base URL string + Return the resolved `url` URL string given a `package_url` base URL string of a package. For example: >>> resolve_relative_url("https://example.com/package", "../path/file.txt") 'https://example.com/path/file.txt' + >>> resolve_relative_url("https://example.com/simple/pkg/", "../../packages/file.whl") + 'https://example.com/packages/file.whl' """ if not url.startswith(("http://", "https://")): base_url_parts = urlparse(package_url) url_parts = urlparse(url) - # If the relative URL starts with '..', remove the last directory from the base URL + # If the relative URL starts with '..', use urljoin to handle multi-level '../' if url_parts.path.startswith(".."): - path = base_url_parts.path.rstrip("/").rsplit("/", 1)[0] + url_parts.path[2:] + url = urljoin(package_url, url) else: path = urlunparse( ("", "", url_parts.path, url_parts.params, url_parts.query, url_parts.fragment) ) - resolved_url_parts = base_url_parts._replace(path=path) - url = urlunparse(resolved_url_parts) + resolved_url_parts = base_url_parts._replace(path=path) + url = urlunparse(resolved_url_parts) return url diff --git a/tests/test_utils.py b/tests/test_utils.py index d9c0fe5c..2e2f87fa 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -23,6 +23,7 @@ from python_inspector.resolution import fetch_and_extract_sdist from python_inspector.utils import get_netrc_auth from python_inspector.utils_pypi import PypiSimpleRepository +from python_inspector.utils_pypi import resolve_relative_url from python_inspector.utils_pypi import valid_python_version test_env = FileDrivenTesting() @@ -164,3 +165,28 @@ def test_parse_reqs_with_setup_requires_and_python_requires(): def test_valid_python_version(): assert valid_python_version("3.8", ">3.1") assert not valid_python_version("3.8.1", ">3.9") + + +def test_resolve_relative_url_multi_level(): + base = "https://example.com/api/pypi/repo/simple/pkg/" + rel = "../../packages/packages/d9/0b/hash/file-1.0-cp310-linux.whl" + result = resolve_relative_url(base, rel) + assert ( + result + == "https://example.com/api/pypi/repo/packages/packages/d9/0b/hash/file-1.0-cp310-linux.whl" + ) + assert "/../" not in result + + +def test_resolve_relative_url_single_level(): + base = "https://example.com/simple/pkg/" + rel = "../other/file.whl" + result = resolve_relative_url(base, rel) + assert result == "https://example.com/simple/other/file.whl" + + +def test_resolve_relative_url_absolute(): + base = "https://example.com/simple/pkg/" + rel = "https://files.pythonhosted.org/packages/file.whl" + result = resolve_relative_url(base, rel) + assert result == "https://files.pythonhosted.org/packages/file.whl"