core, ui: safe mode to sanitize URIs when sending (#6196)

* core: safe mode to sanitize URIs when sending

* ui: use safe sanitize when sending
This commit is contained in:
Evgeny
2025-08-18 12:58:10 +01:00
committed by GitHub
parent 28a0959d96
commit dc3dcd4fc8
13 changed files with 80 additions and 48 deletions

View File

@@ -97,7 +97,7 @@ func showInvalidLinkAlert(_ uri: String, error: String? = nil) {
}
func sanitizeUri(_ s: String) -> (url: (uri: URL, sanitizedUri: URL?)?, error: String?) {
let parsed = parseSanitizeUri(s)
let parsed = parseSanitizeUri(s, safe: false)
return if let uri = URL(string: s), let uriInfo = parsed?.uriInfo {
(url: (uri: uri, sanitizedUri: uriInfo.sanitized.flatMap { URL(string: $0) }), error: nil)
} else {

View File

@@ -1547,13 +1547,13 @@ func sanitizeMessage(_ parsedMsg: [FormattedText]) -> (message: String, parsedMs
var updated = ft
switch ft.format {
case .uri:
if let sanitized = parseSanitizeUri(ft.text)?.uriInfo?.sanitized {
if let sanitized = parseSanitizeUri(ft.text, safe: true)?.uriInfo?.sanitized {
updated = FormattedText(text: sanitized, format: .uri)
pos += updated.text.count
sanitizedPos = pos
}
case let .hyperLink(text, uri):
if let sanitized = parseSanitizeUri(uri)?.uriInfo?.sanitized {
if let sanitized = parseSanitizeUri(uri, safe: true)?.uriInfo?.sanitized {
let updatedText = if let text { "[\(text)](\(sanitized))" } else { sanitized }
updated = FormattedText(text: updatedText, format: .hyperLink(showText: text, linkUri: sanitized))
pos += updated.text.count

View File

@@ -186,9 +186,9 @@ struct ParsedServerAddress: Decodable {
var parseError: String
}
public func parseSanitizeUri(_ s: String) -> ParsedUri? {
public func parseSanitizeUri(_ s: String, safe: Bool) -> ParsedUri? {
var c = s.cString(using: .utf8)!
if let cjson = chat_parse_uri(&c) {
if let cjson = chat_parse_uri(&c, safe ? 1 : 0) {
if let d = dataFromCString(cjson) {
do {
return try jsonDecoder.decode(ParsedUri.self, from: d)

View File

@@ -24,7 +24,7 @@ extern char *chat_send_cmd_retry(chat_ctrl ctl, char *cmd, int retryNum);
extern char *chat_recv_msg_wait(chat_ctrl ctl, int wait);
extern char *chat_parse_markdown(char *str);
extern char *chat_parse_server(char *str);
extern char *chat_parse_uri(char *str);
extern char *chat_parse_uri(char *str, int safe);
extern char *chat_password_hash(char *pwd, char *salt);
extern char *chat_valid_name(char *name);
extern int chat_json_length(char *str);

View File

@@ -64,7 +64,7 @@ extern char *chat_recv_msg(chat_ctrl ctrl); // deprecated
extern char *chat_recv_msg_wait(chat_ctrl ctrl, const int wait);
extern char *chat_parse_markdown(const char *str);
extern char *chat_parse_server(const char *str);
extern char *chat_parse_uri(const char *str);
extern char *chat_parse_uri(const char *str, const int safe);
extern char *chat_password_hash(const char *pwd, const char *salt);
extern char *chat_valid_name(const char *name);
extern int chat_json_length(const char *str);
@@ -148,9 +148,9 @@ Java_chat_simplex_common_platform_CoreKt_chatParseServer(JNIEnv *env, __unused j
}
JNIEXPORT jstring JNICALL
Java_chat_simplex_common_platform_CoreKt_chatParseUri(JNIEnv *env, __unused jclass clazz, jstring str) {
Java_chat_simplex_common_platform_CoreKt_chatParseUri(JNIEnv *env, __unused jclass clazz, jstring str, jint safe) {
const char *_str = (*env)->GetStringUTFChars(env, str, JNI_FALSE);
jstring res = (*env)->NewStringUTF(env, chat_parse_uri(_str));
jstring res = (*env)->NewStringUTF(env, chat_parse_uri(_str, safe));
(*env)->ReleaseStringUTFChars(env, str, _str);
return res;
}

View File

@@ -37,7 +37,7 @@ extern char *chat_recv_msg(chat_ctrl ctrl); // deprecated
extern char *chat_recv_msg_wait(chat_ctrl ctrl, const int wait);
extern char *chat_parse_markdown(const char *str);
extern char *chat_parse_server(const char *str);
extern char *chat_parse_uri(const char *str);
extern char *chat_parse_uri(const char *str, const int safe);
extern char *chat_password_hash(const char *pwd, const char *salt);
extern char *chat_valid_name(const char *name);
extern int chat_json_length(const char *str);
@@ -158,9 +158,9 @@ Java_chat_simplex_common_platform_CoreKt_chatParseServer(JNIEnv *env, jclass cla
}
JNIEXPORT jstring JNICALL
Java_chat_simplex_common_platform_CoreKt_chatParseUri(JNIEnv *env, jclass clazz, jstring str) {
Java_chat_simplex_common_platform_CoreKt_chatParseUri(JNIEnv *env, jclass clazz, jstring str, jint safe) {
const char *_str = encode_to_utf8_chars(env, str);
jstring res = decode_to_utf8_string(env, chat_parse_uri(_str));
jstring res = decode_to_utf8_string(env, chat_parse_uri(_str, safe));
(*env)->ReleaseStringUTFChars(env, str, _str);
return res;
}

View File

@@ -4634,8 +4634,8 @@ data class ParsedServerAddress (
var parseError: String
)
fun parseSanitizeUri(s: String): ParsedUri? {
val parsed = chatParseUri(s)
fun parseSanitizeUri(s: String, safe: Boolean): ParsedUri? {
val parsed = chatParseUri(s, if (safe) 1 else 0)
return runCatching { json.decodeFromString(ParsedUri.serializer(), parsed) }
.onFailure { Log.d(TAG, "parseSanitizeUri decode error: $it") }
.getOrNull()

View File

@@ -28,7 +28,7 @@ external fun chatRecvMsg(ctrl: ChatCtrl): String
external fun chatRecvMsgWait(ctrl: ChatCtrl, timeout: Int): String
external fun chatParseMarkdown(str: String): String
external fun chatParseServer(str: String): String
external fun chatParseUri(str: String): String
external fun chatParseUri(str: String, safe: Int): String
external fun chatPasswordHash(pwd: String, salt: String): String
external fun chatValidName(name: String): String
external fun chatJsonLength(str: String): Int

View File

@@ -876,7 +876,7 @@ fun ComposeView(
var updated = ft
when(ft.format) {
is Format.Uri -> {
val sanitized = parseSanitizeUri(ft.text)?.uriInfo?.sanitized
val sanitized = parseSanitizeUri(ft.text, safe = true)?.uriInfo?.sanitized
if (sanitized != null) {
updated = FormattedText(text = sanitized, format = Format.Uri())
pos += updated.text.count()
@@ -884,7 +884,7 @@ fun ComposeView(
}
}
is Format.HyperLink -> {
val sanitized = parseSanitizeUri(ft.format.linkUri)?.uriInfo?.sanitized
val sanitized = parseSanitizeUri(ft.format.linkUri, safe = true)?.uriInfo?.sanitized
if (sanitized != null) {
val updatedText = if (ft.format.showText == null) sanitized else "[${ft.format.showText}]($sanitized)"
updated = FormattedText(text = updatedText, format = Format.HyperLink(showText = ft.format.showText, linkUri = sanitized))

View File

@@ -427,7 +427,7 @@ fun showInvalidLinkAlert(uri: String, error: String? = null) {
}
fun sanitizeUri(s: String): Pair<Pair<Boolean, String?>?, String?> {
val parsed = parseSanitizeUri(s)
val parsed = parseSanitizeUri(s, safe = false)
return if (parsed?.uriInfo != null) {
(true to parsed.uriInfo.sanitized) to null
} else {

View File

@@ -346,18 +346,20 @@ parseUri s = case U.parseURI U.laxURIParserOptions s of
-- 2) also allow whitelisted parameters,
-- 3) remove all other parameters.
-- *page name: lowercase latin in snake-case or hyphen-case, allowing for sinlge leading or trailing hyphen or underscore.
sanitizeUri :: U.URI -> Maybe U.URI
sanitizeUri uri@U.URI {uriAuthority, uriPath, uriQuery = U.Query originalQS} =
sanitizeUri :: Bool -> U.URI -> Maybe U.URI
sanitizeUri safe uri@U.URI {uriAuthority, uriPath, uriQuery = U.Query originalQS} =
let sanitizedQS
| safe = filter (not . isSafeBlacklisted . fst) originalQS
| isNamePath = case originalQS of
p@(n, _) : ps -> (if isBlacklisted n && not (isWhitelisted n) then id else (p :)) $ filter (isWhitelisted . fst) ps
p@(n, _) : ps -> (if isWhitelisted n || not (isBlacklisted n) then (p :) else id) $ filter (isWhitelisted . fst) ps
[] -> []
| otherwise = filter (isWhitelisted . fst) originalQS
in if length sanitizedQS == length originalQS
then Nothing
else Just $ uri {U.uriQuery = U.Query sanitizedQS}
where
isBlacklisted p = any ($ p) qsBlacklist
isSafeBlacklisted p = any (`B.isPrefixOf` p) qsSafeBlacklist
isBlacklisted p = isSafeBlacklisted p || any ($ p) qsBlacklist
isWhitelisted p = any (\(f, ps) -> f host && p `elem` ps) qsWhitelist
host = maybe "" (\U.Authority {authorityHost = U.Host h} -> h) uriAuthority
isNamePath = B.all (\c -> (c >= 'a' && c <= 'z') || c == '_' || c == '-' || c == '/') uriPath
@@ -368,7 +370,8 @@ sanitizeUri uri@U.URI {uriAuthority, uriPath, uriQuery = U.Query originalQS} =
(dom "amazon.com", ["i", "rh", "k"]), -- department, filter, keyword
(dom "baidu.com", ["wd"]), -- search string
(dom "bing.com", ["mkt"]), -- localized results
(dom "github.com", ["author", "diff", "w"]), -- author in search result, PR parameters
(dom "github.com", ["author", "diff", "ref", "w"]), -- author in search result, PR parameters
(dom "play.google.com", ["id"]),
(dom "reddit.com", ["t"]), -- search type, time range
(dom "wikipedia.com", ["oldid", "uselang"]), -- to show old page revision and chosen user language
(dom "x.com", ["f"]), -- feed type
@@ -380,23 +383,43 @@ sanitizeUri uri@U.URI {uriAuthority, uriPath, uriQuery = U.Query originalQS} =
qsBlacklist :: [ByteString -> Bool]
qsBlacklist =
[ (B.any (== '_')),
("ad" `B.isPrefixOf`),
("af" `B.isPrefixOf`),
("dc" `B.isPrefixOf`),
("fb" `B.isPrefixOf`),
("gc" `B.isPrefixOf`),
("li" `B.isPrefixOf`),
("ref" `B.isPrefixOf`),
("si" `B.isPrefixOf`),
("tw" `B.isPrefixOf`),
("utm" `B.isPrefixOf`),
("camp" `B.isInfixOf`),
("cmp" `B.isInfixOf`),
("dev" `B.isInfixOf`),
("id" `B.isInfixOf`),
("prom" `B.isInfixOf`),
("source" `B.isInfixOf`),
("src" `B.isInfixOf`)
("id" `B.isSuffixOf`),
("source" `B.isPrefixOf`)
]
qsSafeBlacklist :: [ByteString]
qsSafeBlacklist =
[ "ad",
"af",
"camp",
"cmp",
"dc",
"dev",
"ef_",
"fb",
"gad_",
"gc",
"gdf",
"hsa_",
"igsh",
"li",
"matomo_",
"mc_",
"mkwid",
"msc",
"mtm_",
"pcrid",
"piwik_",
"pk_",
"prom",
"ref",
"s_kw",
"si",
"src",
"srs",
"trk_",
"tw",
"utm",
"ycl"
]
markdownText :: FormattedText -> Text

View File

@@ -128,7 +128,7 @@ foreign export ccall "chat_parse_markdown" cChatParseMarkdown :: CString -> IO C
foreign export ccall "chat_parse_server" cChatParseServer :: CString -> IO CJSONString
foreign export ccall "chat_parse_uri" cChatParseUri :: CString -> IO CJSONString
foreign export ccall "chat_parse_uri" cChatParseUri :: CString -> CInt -> IO CJSONString
foreign export ccall "chat_password_hash" cChatPasswordHash :: CString -> CString -> IO CString
@@ -220,8 +220,8 @@ cChatParseServer :: CString -> IO CJSONString
cChatParseServer s = newCStringFromLazyBS . chatParseServer =<< B.packCString s
-- | parse web URI - returns ParsedUri JSON
cChatParseUri :: CString -> IO CJSONString
cChatParseUri s = newCStringFromLazyBS . chatParseUri =<< B.packCString s
cChatParseUri :: CString -> CInt -> IO CJSONString
cChatParseUri s safe = newCStringFromLazyBS . chatParseUri (safe /= 0) =<< B.packCString s
cChatPasswordHash :: CString -> CString -> IO CString
cChatPasswordHash cPwd cSalt = do
@@ -366,11 +366,11 @@ chatParseServer = J.encode . toServerAddress . strDecode
enc :: StrEncoding a => a -> String
enc = B.unpack . strEncode
chatParseUri :: ByteString -> JSONByteString
chatParseUri s = J.encode $ case parseUri s of
chatParseUri :: Bool -> ByteString -> JSONByteString
chatParseUri safe s = J.encode $ case parseUri s of
Left e -> ParsedUri Nothing e
Right uri@U.URI {uriScheme = U.Scheme sch} ->
let sanitized = safeDecodeUtf8 . U.serializeURIRef' <$> sanitizeUri uri
let sanitized = safeDecodeUtf8 . U.serializeURIRef' <$> sanitizeUri safe uri
uriInfo = UriInfo {scheme = safeDecodeUtf8 sch, sanitized}
in ParsedUri (Just uriInfo) ""

View File

@@ -378,9 +378,18 @@ testSanitizeUri = describe "sanitizeUri" $ do
"https://www.youtube.com/watch?v=abc&t=123" `sanitized` Nothing
"https://www.youtube.com/watch?ref=456&v=abc&t=123" `sanitized` Just "https://www.youtube.com/watch?v=abc&t=123"
it "should only allow whitelisted parameters if path contains IDs" $ do
"https://example.com/page/a123?name=abc" `sanitized` Just "https://example.com/page/a123"
"https://youtu.be/a123?si=456" `sanitized` Just "https://youtu.be/a123"
"https://youtu.be/a123?t=456" `sanitized` Nothing
"https://youtu.be/a123?si=456&t=789" `sanitized` Just "https://youtu.be/a123?t=789"
it "should allow some parameters in safe mode, but sanitize in unsafe" $ do
"https://example.com/page/a123?source=abc" `eagerSanitized` Just "https://example.com/page/a123"
"https://example.com/page/a123?source=abc" `safeSanitized` Nothing -- source is in unsafe blacklist
"https://example.com/page/a123?name=abc" `eagerSanitized` Just "https://example.com/page/a123"
"https://example.com/page/a123?name=abc" `safeSanitized` Nothing -- name is not in a whitelist
where
s `sanitized` res = (U.serializeURIRef' <$$> (sanitizeUri <$> parseUri s)) `shouldBe` Right res
s `eagerSanitized` res = sanitized_ False s res
s `safeSanitized` res = sanitized_ True s res
s `sanitized` res = do
s `eagerSanitized` res
s `safeSanitized` res
sanitized_ safe s res = (U.serializeURIRef' <$$> (sanitizeUri safe <$> parseUri s)) `shouldBe` Right res