core, ui: markdown for hyperlinks, warn on unsanitized links, option to sanitize sent links (#6160)

* core: markdown for "hidden" links

* update, test

* api docs

* chatParseUri FFI function

* ios: hyperlinks, offer to open sanitized links, an option to send sanitized links (enabled by default)

* update markdown

* android, desktop: ditto

* ios: export localizations

* core: rename constructor, change Maybe semantics for web links

* rename
This commit is contained in:
Evgeny
2025-08-09 10:52:35 +01:00
committed by GitHub
parent b4293e361b
commit ef60ceea12
55 changed files with 1004 additions and 288 deletions

View File

@@ -1,6 +1,8 @@
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
@@ -12,11 +14,13 @@
module Simplex.Chat.Markdown where
import Control.Applicative (optional, (<|>))
import Control.Monad
import Data.Aeson (FromJSON, ToJSON)
import qualified Data.Aeson as J
import qualified Data.Aeson.TH as JQ
import Data.Attoparsec.Text (Parser)
import qualified Data.Attoparsec.Text as A
import qualified Data.ByteString.Char8 as B
import Data.Char (isAlpha, isAscii, isDigit, isPunctuation, isSpace)
import Data.Either (fromRight)
import Data.Functor (($>))
@@ -34,9 +38,10 @@ import Simplex.Messaging.Agent.Protocol (AConnectionLink (..), ConnReqUriData (.
import Simplex.Messaging.Encoding.String
import Simplex.Messaging.Parsers (defaultJSON, dropPrefix, enumJSON, fstToLower, sumTypeJSON)
import Simplex.Messaging.Protocol (ProtocolServer (..))
import Simplex.Messaging.Util (decodeJSON, safeDecodeUtf8)
import Simplex.Messaging.Util (decodeJSON, safeDecodeUtf8, tshow)
import System.Console.ANSI.Types
import qualified Text.Email.Validate as Email
import qualified URI.ByteString as U
data Markdown = Markdown (Maybe Format) Text | Markdown :|: Markdown
deriving (Eq, Show)
@@ -49,7 +54,9 @@ data Format
| Secret
| Colored {color :: FormatColor}
| Uri
| SimplexLink {linkType :: SimplexLinkType, simplexUri :: AConnectionLink, smpHosts :: NonEmpty Text}
-- showText is Nothing for the usual Uri without text
| HyperLink {showText :: Maybe Text, linkUri :: Text}
| SimplexLink {showText :: Maybe Text, linkType :: SimplexLinkType, simplexUri :: AConnectionLink, smpHosts :: NonEmpty Text}
| Command {commandStr :: Text}
| Mention {memberName :: Text}
| Email
@@ -187,6 +194,7 @@ markdownP = mconcat <$> A.many' fragmentP
'!' -> coloredP <|> wordP
'@' -> mentionP <|> wordP
'/' -> commandP <|> wordP
'[' -> sowLinkP <|> wordP
_
| isDigit c -> phoneP <|> wordP
| otherwise -> wordP
@@ -224,6 +232,20 @@ markdownP = mconcat <$> A.many' fragmentP
let origStr = if c == '\'' then '\'' `T.cons` str `T.snoc` '\'' else str
res = markdown (format str) (pfx `T.cons` origStr)
pure $ if T.null punct then res else res :|: unmarked punct
sowLinkP = do
t <- '[' `inParens` ']'
l <- '(' `inParens` ')'
let hasPunct = T.any (\c -> isPunctuation c && c /= '-' && c /= '_') t
when (hasPunct && t /= l && ("https://" <> t) /= l) $ fail "punctuation in hyperlink text"
f <- case strDecode $ encodeUtf8 l of
Right lnk@(ACL _ cLink) -> case cLink of
CLShort _ -> pure $ simplexUriFormat (Just t) lnk
CLFull _ -> fail "full SimpleX link in hyperlink"
Left _ -> case parseUri $ encodeUtf8 l of
Right _ -> pure $ HyperLink (Just t) l
Left e -> fail $ "not uri: " <> T.unpack e
pure $ markdown f $ T.concat ["[", t, "](", l, ")"]
inParens open close = A.char open *> A.takeWhile1 (/= close) <* A.char close
colorP =
A.anyChar >>= \case
'r' -> optional "ed" $> Red
@@ -253,7 +275,11 @@ markdownP = mconcat <$> A.many' fragmentP
wordMD :: Text -> Markdown
wordMD s
| T.null s = unmarked s
| isUri s' = res $ uriMarkdown s'
| isUri s' = case strDecode $ encodeUtf8 s of
Right cLink -> res $ markdown (simplexUriFormat Nothing cLink) s'
Left _ -> case parseUri $ encodeUtf8 s' of
Right _ -> res $ markdown Uri s'
Left _ -> unmarked s
| isDomain s' = res $ markdown Uri s'
| isEmail s' = res $ markdown Email s'
| otherwise = unmarked s
@@ -265,9 +291,6 @@ markdownP = mconcat <$> A.many' fragmentP
'/' -> False
')' -> False
c -> isPunctuation c
uriMarkdown s = case strDecode $ encodeUtf8 s of
Right cLink -> markdown (simplexUriFormat cLink) s
_ -> markdown Uri s
isUri s = T.length s >= 10 && any (`T.isPrefixOf` s) ["http://", "https://", "simplex:/"]
-- matches what is likely to be a domain, not all valid domain names
isDomain s = case T.splitOn "." s of
@@ -281,11 +304,11 @@ markdownP = mconcat <$> A.many' fragmentP
&& (let p c = isAscii c && isAlpha c in T.all p name && T.all p tld)
isEmail s = T.any (== '@') s && Email.isValid (encodeUtf8 s)
noFormat = pure . unmarked
simplexUriFormat :: AConnectionLink -> Format
simplexUriFormat = \case
simplexUriFormat :: Maybe Text -> AConnectionLink -> Format
simplexUriFormat showText = \case
ACL m (CLFull cReq) -> case cReq of
CRContactUri crData -> SimplexLink (linkType' crData) cLink $ uriHosts crData
CRInvitationUri crData _ -> SimplexLink XLInvitation cLink $ uriHosts crData
CRContactUri crData -> SimplexLink showText (linkType' crData) cLink $ uriHosts crData
CRInvitationUri crData _ -> SimplexLink showText XLInvitation cLink $ uriHosts crData
where
cLink = ACL m $ CLFull $ simplexConnReqUri cReq
uriHosts ConnReqUriData {crSmpQueues} = L.map strEncodeText $ sconcat $ L.map (host . qServer) crSmpQueues
@@ -293,8 +316,8 @@ markdownP = mconcat <$> A.many' fragmentP
Just (CRDataGroup _) -> XLGroup
Nothing -> XLContact
ACL m (CLShort sLnk) -> case sLnk of
CSLContact _ ct srv _ -> SimplexLink (linkType' ct) cLink $ uriHosts srv
CSLInvitation _ srv _ _ -> SimplexLink XLInvitation cLink $ uriHosts srv
CSLContact _ ct srv _ -> SimplexLink showText (linkType' ct) cLink $ uriHosts srv
CSLInvitation _ srv _ _ -> SimplexLink showText XLInvitation cLink $ uriHosts srv
where
cLink = ACL m $ CLShort $ simplexShortLink sLnk
uriHosts srv = L.map strEncodeText $ host srv
@@ -305,6 +328,24 @@ markdownP = mconcat <$> A.many' fragmentP
strEncodeText :: StrEncoding a => a -> Text
strEncodeText = safeDecodeUtf8 . strEncode
parseUri :: B.ByteString -> Either Text U.URI
parseUri s = case U.parseURI U.laxURIParserOptions s of
Left e -> Left $ "Invalid URI: " <> tshow e
Right uri@U.URI {uriScheme = U.Scheme sch, uriAuthority}
| sch /= "http" && sch /= "https" -> Left $ "Unsupported URI scheme: " <> safeDecodeUtf8 sch
| otherwise -> case uriAuthority of
Nothing -> Left "No URI host"
Just U.Authority {authorityHost = U.Host h}
| '.' `B.notElem` h -> Left $ "Invalid URI host: " <> safeDecodeUtf8 h
| otherwise -> Right uri
sanitizeUri :: U.URI -> Maybe U.URI
sanitizeUri uri@U.URI {uriQuery = U.Query originalQS} =
let sanitizedQS = filter (\(p, _) -> p == "q" || p == "search") originalQS
in if length sanitizedQS == length originalQS
then Nothing
else Just $ uri {U.uriQuery = U.Query sanitizedQS}
markdownText :: FormattedText -> Text
markdownText (FormattedText f_ t) = case f_ of
Nothing -> t
@@ -316,6 +357,7 @@ markdownText (FormattedText f_ t) = case f_ of
Secret -> around '#'
Colored (FormatColor c) -> color c
Uri -> t
HyperLink {} -> t
SimplexLink {} -> t
Mention _ -> t
Command _ -> t

View File

@@ -1,5 +1,6 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
@@ -25,6 +26,7 @@ import Data.Functor (($>))
import Data.List (find)
import qualified Data.List.NonEmpty as L
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import Data.Word (Word8)
import Foreign.C.String
import Foreign.C.Types (CInt (..))
@@ -35,7 +37,7 @@ import GHC.IO.Encoding (setFileSystemEncoding, setForeignEncoding, setLocaleEnco
import Simplex.Chat
import Simplex.Chat.Controller
import Simplex.Chat.Library.Commands
import Simplex.Chat.Markdown (ParsedMarkdown (..), parseMaybeMarkdownList)
import Simplex.Chat.Markdown (ParsedMarkdown (..), parseMaybeMarkdownList, parseUri, sanitizeUri)
import Simplex.Chat.Mobile.File
import Simplex.Chat.Mobile.Shared
import Simplex.Chat.Mobile.WebRTC
@@ -56,6 +58,7 @@ import Simplex.Messaging.Protocol (AProtoServerWithAuth (..), AProtocolType (..)
import Simplex.Messaging.Util (catchAll, liftEitherWith, safeDecodeUtf8)
import System.IO (utf8)
import System.Timeout (timeout)
import qualified URI.ByteString as U
#if !defined(dbPostgres)
import Data.ByteArray (ScrubbedBytes)
import Database.SQLite.Simple (SQLError (..))
@@ -81,6 +84,20 @@ eitherToResult :: Maybe RemoteHostId -> Either ChatError r -> APIResult r
eitherToResult rhId = either (APIError rhId) (APIResult rhId)
{-# INLINE eitherToResult #-}
data ParsedUri = ParsedUri
{ uriInfo :: Maybe UriInfo,
parseError :: Text
}
data UriInfo = UriInfo
{ scheme :: Text,
sanitized :: Maybe Text
}
$(JQ.deriveJSON defaultJSON ''UriInfo)
$(JQ.deriveJSON defaultJSON ''ParsedUri)
$(pure [])
instance ToJSON r => ToJSON (APIResult r) where
@@ -111,6 +128,8 @@ foreign export ccall "chat_parse_markdown" cChatParseMarkdown :: CString -> IO C
foreign export ccall "chat_parse_server" cChatParseServer :: CString -> IO CJSONString
foreign export ccall "chat_parse_uri" cChatParseUri :: CString -> IO CJSONString
foreign export ccall "chat_password_hash" cChatPasswordHash :: CString -> CString -> IO CString
foreign export ccall "chat_valid_name" cChatValidName :: CString -> IO CString
@@ -200,6 +219,10 @@ cChatParseMarkdown s = newCStringFromLazyBS . chatParseMarkdown =<< B.packCStrin
cChatParseServer :: CString -> IO CJSONString
cChatParseServer s = newCStringFromLazyBS . chatParseServer =<< B.packCString s
-- | parse web URI - returns ParsedUri JSON
cChatParseUri :: CString -> IO CJSONString
cChatParseUri s = newCStringFromLazyBS . chatParseUri =<< B.packCString s
cChatPasswordHash :: CString -> CString -> IO CString
cChatPasswordHash cPwd cSalt = do
pwd <- B.packCString cPwd
@@ -293,6 +316,7 @@ chatMigrateInitKey chatDbOpts keepKey confirm backgroundMode = runExceptT $ do
DB.ErrorNotADatabase -> Left $ DBMErrorNotADatabase errDbStr
_ -> dbError e
#endif
dbError :: Show e => e -> Either DBMigrationResult DBStore
dbError e = Left . DBMErrorSQL errDbStr $ show e
chatCloseStore :: ChatController -> IO String
@@ -342,6 +366,14 @@ chatParseServer = J.encode . toServerAddress . strDecode
enc :: StrEncoding a => a -> String
enc = B.unpack . strEncode
chatParseUri :: ByteString -> JSONByteString
chatParseUri s = J.encode $ case parseUri s of
Left e -> ParsedUri Nothing e
Right uri@U.URI {uriScheme = U.Scheme sch} ->
let sanitized = safeDecodeUtf8 . U.serializeURIRef' <$> sanitizeUri uri
uriInfo = UriInfo {scheme = safeDecodeUtf8 sch, sanitized}
in ParsedUri (Just uriInfo) ""
chatPasswordHash :: ByteString -> ByteString -> ByteString
chatPasswordHash pwd salt = either (const "") passwordHash salt'
where