Skip to content

Add config setting to avoid adding trailing slash to URLs #1719

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ authors = [
{ name = 'David Montague', email = '[email protected]' },
{ name = 'David Hewitt', email = '[email protected]' },
{ name = 'Sydney Runkle', email = '[email protected]' },
{ name = 'Victorien Plot', email='[email protected]' },
{ name = 'Victorien Plot', email = '[email protected]' },
]
classifiers = [
'Development Status :: 3 - Alpha',
Expand Down Expand Up @@ -149,6 +149,9 @@ require_change_file = false
[tool.pyright]
include = ['python/pydantic_core', 'tests/test_typing.py']
reportUnnecessaryTypeIgnoreComment = true
executionEnvironments = [
{ root = "tests", reportPrivateImportUsage = false, reportMissingParameterType = false, reportAny = false },
]

[tool.inline-snapshot.shortcuts]
fix = ["create", "fix"]
Expand Down
22 changes: 18 additions & 4 deletions python/pydantic_core/_pydantic_core.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -522,8 +522,15 @@ class Url(SupportsAllComparisons):
by Mozilla.
"""

def __init__(self, url: str) -> None: ...
def __new__(cls, url: str) -> Self: ...
def __init__(self, url: str, *, add_trailing_slash: bool = True) -> None:
"""Initialize a new URL object.
Args:
url: The URL string to parse.
add_trailing_slash: Whether to add an extra trailing slash to some URLs, defaults to `True` for
backward compatibility, default will change to `False` in v3 version.
"""
def __new__(cls, url: str, *, add_trailing_slash: bool = True) -> Self: ...
@property
def scheme(self) -> str: ...
@property
Expand Down Expand Up @@ -568,8 +575,15 @@ class MultiHostUrl(SupportsAllComparisons):
by Mozilla.
"""

def __init__(self, url: str) -> None: ...
def __new__(cls, url: str) -> Self: ...
def __init__(self, url: str, *, add_trailing_slash: bool = True) -> None:
"""Initialize a new MultiHostUrl object.
Args:
url: The URL string to parse.
add_trailing_slash: Whether to add an extra trailing slash to some URLs, defaults to `True` for
backward compatibility, default will change to `False` in v3 version.
"""
def __new__(cls, url: str, *, add_trailing_slash: bool = True) -> Self: ...
@property
def scheme(self) -> str: ...
@property
Expand Down
13 changes: 13 additions & 0 deletions python/pydantic_core/core_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ class CoreConfig(TypedDict, total=False):
validate_by_alias: Whether to use the field's alias when validating against the provided input data. Default is `True`.
validate_by_name: Whether to use the field's name when validating against the provided input data. Default is `False`. Replacement for `populate_by_name`.
serialize_by_alias: Whether to serialize by alias. Default is `False`, expected to change to `True` in V3.
url_add_trailing_slash: Whether to add an extra trailing slash to some URLs, defaults to `True` for
backward compatibility, default will change to `False` in v3 version.
"""

title: str
Expand Down Expand Up @@ -114,6 +116,7 @@ class CoreConfig(TypedDict, total=False):
validate_by_alias: bool # default: True
validate_by_name: bool # default: False
serialize_by_alias: bool # default: False
url_add_trailing_slash: bool # default: True


IncExCall: TypeAlias = 'set[int | str] | dict[int | str, IncExCall] | None'
Expand Down Expand Up @@ -3824,6 +3827,7 @@ class UrlSchema(TypedDict, total=False):
default_host: str
default_port: int
default_path: str
add_trailing_slash: bool
strict: bool
ref: str
metadata: dict[str, Any]
Expand All @@ -3838,6 +3842,7 @@ def url_schema(
default_host: str | None = None,
default_port: int | None = None,
default_path: str | None = None,
add_trailing_slash: bool | None = None,
strict: bool | None = None,
ref: str | None = None,
metadata: dict[str, Any] | None = None,
Expand All @@ -3862,6 +3867,8 @@ def url_schema(
default_host: The default host to use if the URL does not have a host
default_port: The default port to use if the URL does not have a port
default_path: The default path to use if the URL does not have a path
add_trailing_slash: Whether to add an extra trailing slash to some URLs, defaults to `True` for
backward compatibility, default will change to `False` in v3 version.
strict: Whether to use strict URL parsing
ref: optional unique identifier of the schema, used to reference the schema in other places
metadata: Any other information you want to include with the schema, not used by pydantic-core
Expand All @@ -3879,6 +3886,7 @@ def url_schema(
ref=ref,
metadata=metadata,
serialization=serialization,
add_trailing_slash=add_trailing_slash,
)


Expand All @@ -3890,6 +3898,7 @@ class MultiHostUrlSchema(TypedDict, total=False):
default_host: str
default_port: int
default_path: str
add_trailing_slash: bool
strict: bool
ref: str
metadata: dict[str, Any]
Expand All @@ -3904,6 +3913,7 @@ def multi_host_url_schema(
default_host: str | None = None,
default_port: int | None = None,
default_path: str | None = None,
add_trailing_slash: bool | None = None,
strict: bool | None = None,
ref: str | None = None,
metadata: dict[str, Any] | None = None,
Expand All @@ -3928,6 +3938,8 @@ def multi_host_url_schema(
default_host: The default host to use if the URL does not have a host
default_port: The default port to use if the URL does not have a port
default_path: The default path to use if the URL does not have a path
add_trailing_slash: Whether to add an extra trailing slash to some URLs, defaults to `True` for
backward compatibility, default will change to `False` in v3 version.
strict: Whether to use strict URL parsing
ref: optional unique identifier of the schema, used to reference the schema in other places
metadata: Any other information you want to include with the schema, not used by pydantic-core
Expand All @@ -3941,6 +3953,7 @@ def multi_host_url_schema(
default_host=default_host,
default_port=default_port,
default_path=default_path,
add_trailing_slash=add_trailing_slash,
strict=strict,
ref=ref,
metadata=metadata,
Expand Down
101 changes: 67 additions & 34 deletions src/url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,38 +14,44 @@ use url::Url;
use crate::tools::SchemaDict;
use crate::SchemaValidator;

static SCHEMA_DEFINITION_URL: GILOnceCell<SchemaValidator> = GILOnceCell::new();

#[pyclass(name = "Url", module = "pydantic_core._pydantic_core", subclass, frozen)]
#[derive(Clone, Hash)]
#[cfg_attr(debug_assertions, derive(Debug))]
pub struct PyUrl {
lib_url: Url,
remove_trailing_slash: bool,
}

impl PyUrl {
pub fn new(lib_url: Url) -> Self {
Self { lib_url }
pub fn new(lib_url: Url, remove_trailing_slash: bool) -> Self {
Self {
lib_url,
remove_trailing_slash,
}
}

pub fn url(&self) -> &Url {
&self.lib_url
}

pub fn mut_url(&mut self) -> &mut Url {
&mut self.lib_url
}
}

fn build_schema_validator(py: Python, schema_type: &str) -> SchemaValidator {
let schema = PyDict::new(py);
schema.set_item("type", schema_type).unwrap();
SchemaValidator::py_new(py, &schema, None).unwrap()
impl From<PyUrl> for Url {
fn from(value: PyUrl) -> Url {
value.lib_url
}
}

#[pymethods]
impl PyUrl {
#[new]
pub fn py_new(py: Python, url: &Bound<'_, PyAny>) -> PyResult<Self> {
let schema_obj = SCHEMA_DEFINITION_URL
.get_or_init(py, || build_schema_validator(py, "url"))
.validate_python(py, url, None, None, None, None, false.into(), None, None)?;
#[pyo3(signature = (url, *, add_trailing_slash=true))]
pub fn py_new(py: Python, url: &Bound<'_, PyAny>, add_trailing_slash: bool) -> PyResult<Self> {
let schema_validator = get_schema_validator(py, false, add_trailing_slash)?;
let schema_obj = schema_validator.validate_python(py, url, None, None, None, None, false.into(), None, None)?;
schema_obj.extract(py)
}

Expand Down Expand Up @@ -114,11 +120,15 @@ impl PyUrl {

// string representation of the URL, with punycode decoded when appropriate
pub fn unicode_string(&self) -> String {
unicode_url(&self.lib_url)
unicode_url(&self.lib_url, self.remove_trailing_slash)
}

pub fn __str__(&self) -> &str {
self.lib_url.as_str()
let mut s = self.lib_url.as_str();
if self.remove_trailing_slash && s.ends_with('/') {
s = &s[..s.len() - 1];
}
s
}

pub fn __repr__(&self) -> String {
Expand Down Expand Up @@ -201,11 +211,8 @@ pub struct PyMultiHostUrl {
}

impl PyMultiHostUrl {
pub fn new(ref_url: Url, extra_urls: Option<Vec<Url>>) -> Self {
Self {
ref_url: PyUrl::new(ref_url),
extra_urls,
}
pub fn new(ref_url: PyUrl, extra_urls: Option<Vec<Url>>) -> Self {
Self { ref_url, extra_urls }
}

pub fn lib_url(&self) -> &Url {
Expand All @@ -217,15 +224,13 @@ impl PyMultiHostUrl {
}
}

static SCHEMA_DEFINITION_MULTI_HOST_URL: GILOnceCell<SchemaValidator> = GILOnceCell::new();

#[pymethods]
impl PyMultiHostUrl {
#[new]
pub fn py_new(py: Python, url: &Bound<'_, PyAny>) -> PyResult<Self> {
let schema_obj = SCHEMA_DEFINITION_MULTI_HOST_URL
.get_or_init(py, || build_schema_validator(py, "multi-host-url"))
.validate_python(py, url, None, None, None, None, false.into(), None, None)?;
#[pyo3(signature = (url, *, add_trailing_slash=true))]
pub fn py_new(py: Python, url: &Bound<'_, PyAny>, add_trailing_slash: bool) -> PyResult<Self> {
let schema_validator = get_schema_validator(py, true, add_trailing_slash)?;
let schema_obj = schema_validator.validate_python(py, url, None, None, None, None, false.into(), None, None)?;
schema_obj.extract(py)
}

Expand Down Expand Up @@ -279,13 +284,12 @@ impl PyMultiHostUrl {

// special urls will have had a trailing slash added, non-special urls will not
// hence we need to remove the last char if the schema is special
#[allow(clippy::bool_to_int_with_if)]
let sub = if schema_is_special(schema) { 1 } else { 0 };
let sub: usize = schema_is_special(schema).into();

let hosts = extra_urls
.iter()
.map(|url| {
let str = unicode_url(url);
let str = unicode_url(url, false);
str[host_offset..str.len() - sub].to_string()
})
.collect::<Vec<String>>()
Expand All @@ -302,21 +306,20 @@ impl PyMultiHostUrl {
let schema = self.ref_url.lib_url.scheme();
let host_offset = schema.len() + 3;

let mut full_url = self.ref_url.lib_url.to_string();
let mut full_url = self.ref_url.__str__().to_string();
full_url.insert(host_offset, ',');

// special urls will have had a trailing slash added, non-special urls will not
// hence we need to remove the last char if the schema is special
#[allow(clippy::bool_to_int_with_if)]
let sub = if schema_is_special(schema) { 1 } else { 0 };
let sub: usize = schema_is_special(schema).into();

let hosts = extra_urls
.iter()
.map(|url| {
let str = url.as_str();
&str[host_offset..str.len() - sub]
})
.collect::<Vec<&str>>()
.collect::<Vec<_>>()
.join(",");
full_url.insert_str(host_offset, &hosts);
full_url
Expand Down Expand Up @@ -477,10 +480,10 @@ fn host_to_dict<'a>(py: Python<'a>, lib_url: &Url) -> PyResult<Bound<'a, PyDict>
Ok(dict)
}

fn unicode_url(lib_url: &Url) -> String {
fn unicode_url(lib_url: &Url, remove_trailing_slash: bool) -> String {
let mut s = lib_url.to_string();

match lib_url.host() {
s = match lib_url.host() {
Some(url::Host::Domain(domain)) if is_punnycode_domain(lib_url, domain) => {
if let Some(decoded) = decode_punycode(domain) {
// replace the range containing the punycode domain with the decoded domain
Expand All @@ -490,7 +493,11 @@ fn unicode_url(lib_url: &Url) -> String {
s
}
_ => s,
};
if remove_trailing_slash && s.ends_with('/') {
s.pop();
}
s
}

fn decode_punycode(domain: &str) -> Option<String> {
Expand All @@ -517,3 +524,29 @@ fn is_punnycode_domain(lib_url: &Url, domain: &str) -> bool {
pub fn schema_is_special(schema: &str) -> bool {
matches!(schema, "http" | "https" | "ws" | "wss" | "ftp" | "file")
}

static SCHEMA_URL_SINGLE_TRUE: GILOnceCell<SchemaValidator> = GILOnceCell::new();
static SCHEMA_URL_SINGLE_FALSE: GILOnceCell<SchemaValidator> = GILOnceCell::new();
static SCHEMA_URL_MULTI_TRUE: GILOnceCell<SchemaValidator> = GILOnceCell::new();
static SCHEMA_URL_MULTI_FALSE: GILOnceCell<SchemaValidator> = GILOnceCell::new();

macro_rules! make_schema_val {
($py:ident, $schema_type:literal, $add_trailing_slash:literal) => {{
let schema = PyDict::new($py);
schema.set_item(intern!($py, "type"), intern!($py, $schema_type))?;
// add_trailing_slash defaults to true, so only set it if false
if !$add_trailing_slash {
schema.set_item(intern!($py, "add_trailing_slash"), false)?;
}
SchemaValidator::py_new($py, &schema, None)
}};
}

fn get_schema_validator(py: Python<'_>, multi_host: bool, add_trailing_slash: bool) -> PyResult<&SchemaValidator> {
match (multi_host, add_trailing_slash) {
(false, true) => SCHEMA_URL_SINGLE_TRUE.get_or_try_init(py, || make_schema_val!(py, "url", true)),
(false, false) => SCHEMA_URL_SINGLE_FALSE.get_or_try_init(py, || make_schema_val!(py, "url", false)),
(true, true) => SCHEMA_URL_MULTI_TRUE.get_or_try_init(py, || make_schema_val!(py, "multi-host-url", true)),
(true, false) => SCHEMA_URL_MULTI_FALSE.get_or_try_init(py, || make_schema_val!(py, "multi-host-url", false)),
}
}
Loading
Loading