feat(http): implement URL normalization and validation for LibreTranslate service

This commit is contained in:
Ivan
2026-05-02 04:35:47 -05:00
parent 5784b70ee2
commit 6e2d5ba19a
+66 -2
View File
@@ -4,7 +4,9 @@
from __future__ import annotations
from urllib.parse import urlparse, urlunparse
import ipaddress
import re
from urllib.parse import unquote, urlparse, urlunparse
class UnsafeOutboundUrlError(ValueError):
@@ -41,7 +43,69 @@ def normalize_loopback_http_service_base(url: str) -> str:
msg = "URL host must be 127.0.0.1, localhost, or ::1"
raise UnsafeOutboundUrlError(msg)
# Rebuild origin only (LibreTranslate mounts at /languages, /translate, etc.)
authority = netloc
origin = urlunparse((parsed.scheme, authority, "", "", "", ""))
return origin.rstrip("/")
_WS_CTRL = re.compile(r"[\x00-\x20\x7f]")
def normalize_libretranslate_http_service_base(url: str) -> str:
"""Return scheme://host:port with no path, query, or fragment.
Accepts any HTTP(S) hostname or IP reachable from this process (remote LibreTranslate or
public API). Embedded credentials are rejected; non-http(s) schemes are rejected.
Literal IPv4 link-local targets (``169.254.0.0/16``) are rejected as a common SSRF/metadata
path. Other private or loopback addresses are allowed so local servers and overlays (e.g. VPN
mesh) continue to work.
"""
if not url or not isinstance(url, str):
msg = "URL must be a non-empty string"
raise UnsafeOutboundUrlError(msg)
trimmed = url.strip()
if _WS_CTRL.search(trimmed):
msg = "URL must not contain whitespace or control characters"
raise UnsafeOutboundUrlError(msg)
parsed = urlparse(trimmed)
if parsed.scheme not in ("http", "https"):
msg = "URL must use http or https"
raise UnsafeOutboundUrlError(msg)
netloc = parsed.netloc or ""
if "@" in netloc:
msg = "URL must not contain credentials"
raise UnsafeOutboundUrlError(msg)
host = parsed.hostname
if host is None:
msg = "URL must include a hostname"
raise UnsafeOutboundUrlError(msg)
host_decoded = unquote(host, errors="strict")
if _WS_CTRL.search(host_decoded):
msg = "URL must not contain whitespace or control characters"
raise UnsafeOutboundUrlError(msg)
host_for_ip_check = host_decoded.lower().strip("[]")
try:
addr = ipaddress.ip_address(host_for_ip_check)
except ValueError:
pass
else:
if addr.version == 4 and addr.is_link_local:
msg = "URL must not target an IPv4 link-local address"
raise UnsafeOutboundUrlError(msg)
if addr.is_multicast or addr.is_unspecified:
msg = "URL must not target a multicast or unspecified address"
raise UnsafeOutboundUrlError(msg)
if addr.is_reserved:
msg = "URL must not target a reserved address"
raise UnsafeOutboundUrlError(msg)
authority = netloc
origin = urlunparse((parsed.scheme, authority, "", "", "", ""))
return origin.rstrip("/")