mirror of
https://github.com/simplex-chat/simplex-chat.git
synced 2026-03-31 05:25:47 +00:00
* core: markdown for "hidden" links * update, test * api docs * chatParseUri FFI function * ios: hyperlinks, offer to open sanitized links, an option to send sanitized links (enabled by default) * update markdown * android, desktop: ditto * ios: export localizations * core: rename constructor, change Maybe semantics for web links * rename
417 lines
15 KiB
Haskell
417 lines
15 KiB
Haskell
{-# LANGUAGE DeriveAnyClass #-}
|
|
{-# LANGUAGE DuplicateRecordFields #-}
|
|
{-# LANGUAGE GADTs #-}
|
|
{-# LANGUAGE LambdaCase #-}
|
|
{-# LANGUAGE MultiWayIf #-}
|
|
{-# LANGUAGE NamedFieldPuns #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
{-# LANGUAGE TemplateHaskell #-}
|
|
{-# LANGUAGE TupleSections #-}
|
|
{-# OPTIONS_GHC -Wno-unrecognised-pragmas #-}
|
|
|
|
{-# HLINT ignore "Use newtype instead of data" #-}
|
|
|
|
module Simplex.Chat.Markdown where
|
|
|
|
import Control.Applicative (optional, (<|>))
|
|
import Control.Monad
|
|
import Data.Aeson (FromJSON, ToJSON)
|
|
import qualified Data.Aeson as J
|
|
import qualified Data.Aeson.TH as JQ
|
|
import Data.Attoparsec.Text (Parser)
|
|
import qualified Data.Attoparsec.Text as A
|
|
import qualified Data.ByteString.Char8 as B
|
|
import Data.Char (isAlpha, isAscii, isDigit, isPunctuation, isSpace)
|
|
import Data.Either (fromRight)
|
|
import Data.Functor (($>))
|
|
import Data.List (foldl', intercalate)
|
|
import Data.List.NonEmpty (NonEmpty)
|
|
import qualified Data.List.NonEmpty as L
|
|
import Data.Maybe (fromMaybe, isNothing, mapMaybe)
|
|
import Data.Semigroup (sconcat)
|
|
import Data.String
|
|
import Data.Text (Text)
|
|
import qualified Data.Text as T
|
|
import Data.Text.Encoding (encodeUtf8)
|
|
import Simplex.Chat.Types
|
|
import Simplex.Messaging.Agent.Protocol (AConnectionLink (..), ConnReqUriData (..), ConnShortLink (..), ConnectionLink (..), ConnectionRequestUri (..), ContactConnType (..), SMPQueue (..), simplexConnReqUri, simplexShortLink)
|
|
import Simplex.Messaging.Encoding.String
|
|
import Simplex.Messaging.Parsers (defaultJSON, dropPrefix, enumJSON, fstToLower, sumTypeJSON)
|
|
import Simplex.Messaging.Protocol (ProtocolServer (..))
|
|
import Simplex.Messaging.Util (decodeJSON, safeDecodeUtf8, tshow)
|
|
import System.Console.ANSI.Types
|
|
import qualified Text.Email.Validate as Email
|
|
import qualified URI.ByteString as U
|
|
|
|
data Markdown = Markdown (Maybe Format) Text | Markdown :|: Markdown
|
|
deriving (Eq, Show)
|
|
|
|
data Format
|
|
= Bold
|
|
| Italic
|
|
| StrikeThrough
|
|
| Snippet
|
|
| Secret
|
|
| Colored {color :: FormatColor}
|
|
| Uri
|
|
-- showText is Nothing for the usual Uri without text
|
|
| HyperLink {showText :: Maybe Text, linkUri :: Text}
|
|
| SimplexLink {showText :: Maybe Text, linkType :: SimplexLinkType, simplexUri :: AConnectionLink, smpHosts :: NonEmpty Text}
|
|
| Command {commandStr :: Text}
|
|
| Mention {memberName :: Text}
|
|
| Email
|
|
| Phone
|
|
| Unknown {json :: J.Value}
|
|
deriving (Eq, Show)
|
|
|
|
mentionedNames :: MarkdownList -> [Text]
|
|
mentionedNames = mapMaybe (\(FormattedText f _) -> mentionedName =<< f)
|
|
where
|
|
mentionedName = \case
|
|
Mention name -> Just name
|
|
_ -> Nothing
|
|
|
|
data SimplexLinkType = XLContact | XLInvitation | XLGroup | XLChannel
|
|
deriving (Eq, Show)
|
|
|
|
colored :: Color -> Format
|
|
colored = Colored . FormatColor
|
|
{-# INLINE colored #-}
|
|
|
|
markdown :: Format -> Text -> Markdown
|
|
markdown = Markdown . Just
|
|
{-# INLINE markdown #-}
|
|
|
|
instance Semigroup Markdown where
|
|
m <> (Markdown _ "") = m
|
|
(Markdown _ "") <> m = m
|
|
m1@(Markdown f1 s1) <> m2@(Markdown f2 s2)
|
|
| f1 == f2 = Markdown f1 $ s1 <> s2
|
|
| otherwise = m1 :|: m2
|
|
m1@(Markdown f1 s1) <> ms@(Markdown f2 s2 :|: m3)
|
|
| f1 == f2 = Markdown f1 (s1 <> s2) :|: m3
|
|
| otherwise = m1 :|: ms
|
|
ms@(m1 :|: Markdown f2 s2) <> m3@(Markdown f3 s3)
|
|
| f2 == f3 = m1 :|: Markdown f2 (s2 <> s3)
|
|
| otherwise = ms :|: m3
|
|
m1 <> m2 = m1 :|: m2
|
|
|
|
instance Monoid Markdown where mempty = unmarked ""
|
|
|
|
instance IsString Markdown where fromString = unmarked . T.pack
|
|
|
|
newtype FormatColor = FormatColor Color
|
|
deriving (Eq, Show)
|
|
|
|
instance FromJSON FormatColor where
|
|
parseJSON =
|
|
J.withText "FormatColor" $
|
|
fmap FormatColor . \case
|
|
"red" -> pure Red
|
|
"green" -> pure Green
|
|
"blue" -> pure Blue
|
|
"yellow" -> pure Yellow
|
|
"cyan" -> pure Cyan
|
|
"magenta" -> pure Magenta
|
|
"black" -> pure Black
|
|
"white" -> pure White
|
|
unexpected -> fail $ "unexpected FormatColor: " <> show unexpected
|
|
|
|
instance ToJSON FormatColor where
|
|
toJSON (FormatColor c) = case c of
|
|
Red -> "red"
|
|
Green -> "green"
|
|
Blue -> "blue"
|
|
Yellow -> "yellow"
|
|
Cyan -> "cyan"
|
|
Magenta -> "magenta"
|
|
Black -> "black"
|
|
White -> "white"
|
|
|
|
data FormattedText = FormattedText {format :: Maybe Format, text :: Text}
|
|
deriving (Eq, Show)
|
|
|
|
instance IsString FormattedText where
|
|
fromString = FormattedText Nothing . T.pack
|
|
|
|
type MarkdownList = [FormattedText]
|
|
|
|
data ParsedMarkdown = ParsedMarkdown {formattedText :: Maybe MarkdownList}
|
|
|
|
unmarked :: Text -> Markdown
|
|
unmarked = Markdown Nothing
|
|
|
|
parseMaybeMarkdownList :: Text -> Maybe MarkdownList
|
|
parseMaybeMarkdownList s = case ls of
|
|
[] -> Nothing
|
|
[l]
|
|
| T.null cmd -> Nothing
|
|
| isCmd -> Just [FormattedText (Just $ Command cmd) l]
|
|
where
|
|
(isCmd, cmd) = case T.uncons $ T.dropWhile (== ' ') l of
|
|
Just (c, rest) -> (c == '/', rest)
|
|
Nothing -> (False, "")
|
|
_
|
|
| all (isNothing . format) ml -> Nothing
|
|
| otherwise -> Just . reverse $ foldl' acc [] ml
|
|
where
|
|
ls = T.lines s
|
|
ml = intercalate ["\n"] $ map (markdownToList . parseMarkdown) ls
|
|
acc [] m = [m]
|
|
acc ms@(FormattedText f t : ms') ft@(FormattedText f' t')
|
|
| f == f' = FormattedText f (t <> t') : ms'
|
|
| otherwise = ft : ms
|
|
|
|
parseMarkdownList :: Text -> MarkdownList
|
|
parseMarkdownList = markdownToList . parseMarkdown
|
|
|
|
markdownToList :: Markdown -> MarkdownList
|
|
markdownToList (Markdown f s) = [FormattedText f s]
|
|
markdownToList (m1 :|: m2) = markdownToList m1 <> markdownToList m2
|
|
|
|
parseMarkdown :: Text -> Markdown
|
|
parseMarkdown s = fromRight (unmarked s) $ A.parseOnly (markdownP <* A.endOfInput) s
|
|
|
|
isSimplexLink :: Format -> Bool
|
|
isSimplexLink = \case
|
|
SimplexLink {} -> True
|
|
_ -> False
|
|
|
|
markdownP :: Parser Markdown
|
|
markdownP = mconcat <$> A.many' fragmentP
|
|
where
|
|
fragmentP :: Parser Markdown
|
|
fragmentP =
|
|
A.peekChar >>= \case
|
|
Just c -> case c of
|
|
' ' -> unmarked <$> A.takeWhile (== ' ')
|
|
'+' -> phoneP <|> wordP
|
|
'*' -> formattedP '*' Bold
|
|
'_' -> formattedP '_' Italic
|
|
'~' -> formattedP '~' StrikeThrough
|
|
'`' -> formattedP '`' Snippet
|
|
'#' -> A.char '#' *> secretP
|
|
'!' -> coloredP <|> wordP
|
|
'@' -> mentionP <|> wordP
|
|
'/' -> commandP <|> wordP
|
|
'[' -> sowLinkP <|> wordP
|
|
_
|
|
| isDigit c -> phoneP <|> wordP
|
|
| otherwise -> wordP
|
|
Nothing -> fail ""
|
|
formattedP :: Char -> Format -> Parser Markdown
|
|
formattedP c f = do
|
|
s <- A.char c *> A.takeTill (== c)
|
|
(A.char c $> md c f s) <|> noFormat (c `T.cons` s)
|
|
md :: Char -> Format -> Text -> Markdown
|
|
md c f s
|
|
| T.null s || T.head s == ' ' || T.last s == ' ' =
|
|
unmarked $ c `T.cons` s `T.snoc` c
|
|
| otherwise = markdown f s
|
|
secretP :: Parser Markdown
|
|
secretP = secret <$> A.takeWhile (== '#') <*> A.takeTill (== '#') <*> A.takeWhile (== '#')
|
|
secret :: Text -> Text -> Text -> Markdown
|
|
secret b s a
|
|
| T.null a || T.null s || T.head s == ' ' || T.last s == ' ' =
|
|
unmarked $ '#' `T.cons` ss
|
|
| otherwise = markdown Secret $ T.init ss
|
|
where
|
|
ss = b <> s <> a
|
|
coloredP :: Parser Markdown
|
|
coloredP = do
|
|
clr <- A.char '!' *> colorP <* A.space
|
|
s <- ((<>) <$> A.takeWhile1 (\c -> c /= ' ' && c /= '!') <*> A.takeTill (== '!')) <* A.char '!'
|
|
if T.null s || T.last s == ' '
|
|
then fail "not colored"
|
|
else pure $ markdown (colored clr) s
|
|
mentionP = prefixedStringP '@' displayNameTextP_ Mention
|
|
commandP = prefixedStringP '/' commandTextP Command
|
|
prefixedStringP pfx parser format = do
|
|
c <- A.char pfx *> A.peekChar'
|
|
(str, punct) <- parser
|
|
let origStr = if c == '\'' then '\'' `T.cons` str `T.snoc` '\'' else str
|
|
res = markdown (format str) (pfx `T.cons` origStr)
|
|
pure $ if T.null punct then res else res :|: unmarked punct
|
|
sowLinkP = do
|
|
t <- '[' `inParens` ']'
|
|
l <- '(' `inParens` ')'
|
|
let hasPunct = T.any (\c -> isPunctuation c && c /= '-' && c /= '_') t
|
|
when (hasPunct && t /= l && ("https://" <> t) /= l) $ fail "punctuation in hyperlink text"
|
|
f <- case strDecode $ encodeUtf8 l of
|
|
Right lnk@(ACL _ cLink) -> case cLink of
|
|
CLShort _ -> pure $ simplexUriFormat (Just t) lnk
|
|
CLFull _ -> fail "full SimpleX link in hyperlink"
|
|
Left _ -> case parseUri $ encodeUtf8 l of
|
|
Right _ -> pure $ HyperLink (Just t) l
|
|
Left e -> fail $ "not uri: " <> T.unpack e
|
|
pure $ markdown f $ T.concat ["[", t, "](", l, ")"]
|
|
inParens open close = A.char open *> A.takeWhile1 (/= close) <* A.char close
|
|
colorP =
|
|
A.anyChar >>= \case
|
|
'r' -> optional "ed" $> Red
|
|
'g' -> optional "reen" $> Green
|
|
'b' -> optional "lue" $> Blue
|
|
'y' -> optional "ellow" $> Yellow
|
|
'c' -> optional "yan" $> Cyan
|
|
'm' -> optional "agenta" $> Magenta
|
|
'1' -> pure Red
|
|
'2' -> pure Green
|
|
'3' -> pure Blue
|
|
'4' -> pure Yellow
|
|
'5' -> pure Cyan
|
|
'6' -> pure Magenta
|
|
_ -> fail "not color"
|
|
phoneP = do
|
|
country <- optional $ T.cons <$> A.char '+' <*> A.takeWhile1 isDigit
|
|
code <- optional $ conc4 <$> phoneSep <*> "(" <*> A.takeWhile1 isDigit <*> ")"
|
|
segments <- mconcat <$> A.many' ((<>) <$> phoneSep <*> A.takeWhile1 isDigit)
|
|
let s = fromMaybe "" country <> fromMaybe "" code <> segments
|
|
len = T.length s
|
|
if 7 <= len && len <= 22 then pure $ markdown Phone s else fail "not phone"
|
|
conc4 s1 s2 s3 s4 = s1 <> s2 <> s3 <> s4
|
|
phoneSep = " " <|> "-" <|> "." <|> ""
|
|
wordP :: Parser Markdown
|
|
wordP = wordMD <$> A.takeTill (== ' ')
|
|
wordMD :: Text -> Markdown
|
|
wordMD s
|
|
| T.null s = unmarked s
|
|
| isUri s' = case strDecode $ encodeUtf8 s of
|
|
Right cLink -> res $ markdown (simplexUriFormat Nothing cLink) s'
|
|
Left _ -> case parseUri $ encodeUtf8 s' of
|
|
Right _ -> res $ markdown Uri s'
|
|
Left _ -> unmarked s
|
|
| isDomain s' = res $ markdown Uri s'
|
|
| isEmail s' = res $ markdown Email s'
|
|
| otherwise = unmarked s
|
|
where
|
|
punct = T.takeWhileEnd isPunctuation' s
|
|
s' = T.dropWhileEnd isPunctuation' s
|
|
res md' = if T.null punct then md' else md' :|: unmarked punct
|
|
isPunctuation' = \case
|
|
'/' -> False
|
|
')' -> False
|
|
c -> isPunctuation c
|
|
isUri s = T.length s >= 10 && any (`T.isPrefixOf` s) ["http://", "https://", "simplex:/"]
|
|
-- matches what is likely to be a domain, not all valid domain names
|
|
isDomain s = case T.splitOn "." s of
|
|
[name, tld] -> isDomain_ name tld
|
|
[sub, name, tld] -> T.length sub >= 3 && T.length sub <= 8 && isDomain_ name tld
|
|
_ -> False
|
|
where
|
|
isDomain_ name tld =
|
|
(let n = T.length name in n >= 1 && n <= 24)
|
|
&& (let n = T.length tld in n >= 2 && n <= 8)
|
|
&& (let p c = isAscii c && isAlpha c in T.all p name && T.all p tld)
|
|
isEmail s = T.any (== '@') s && Email.isValid (encodeUtf8 s)
|
|
noFormat = pure . unmarked
|
|
simplexUriFormat :: Maybe Text -> AConnectionLink -> Format
|
|
simplexUriFormat showText = \case
|
|
ACL m (CLFull cReq) -> case cReq of
|
|
CRContactUri crData -> SimplexLink showText (linkType' crData) cLink $ uriHosts crData
|
|
CRInvitationUri crData _ -> SimplexLink showText XLInvitation cLink $ uriHosts crData
|
|
where
|
|
cLink = ACL m $ CLFull $ simplexConnReqUri cReq
|
|
uriHosts ConnReqUriData {crSmpQueues} = L.map strEncodeText $ sconcat $ L.map (host . qServer) crSmpQueues
|
|
linkType' ConnReqUriData {crClientData} = case crClientData >>= decodeJSON of
|
|
Just (CRDataGroup _) -> XLGroup
|
|
Nothing -> XLContact
|
|
ACL m (CLShort sLnk) -> case sLnk of
|
|
CSLContact _ ct srv _ -> SimplexLink showText (linkType' ct) cLink $ uriHosts srv
|
|
CSLInvitation _ srv _ _ -> SimplexLink showText XLInvitation cLink $ uriHosts srv
|
|
where
|
|
cLink = ACL m $ CLShort $ simplexShortLink sLnk
|
|
uriHosts srv = L.map strEncodeText $ host srv
|
|
linkType' = \case
|
|
CCTGroup -> XLGroup
|
|
CCTChannel -> XLChannel
|
|
CCTContact -> XLContact
|
|
strEncodeText :: StrEncoding a => a -> Text
|
|
strEncodeText = safeDecodeUtf8 . strEncode
|
|
|
|
parseUri :: B.ByteString -> Either Text U.URI
|
|
parseUri s = case U.parseURI U.laxURIParserOptions s of
|
|
Left e -> Left $ "Invalid URI: " <> tshow e
|
|
Right uri@U.URI {uriScheme = U.Scheme sch, uriAuthority}
|
|
| sch /= "http" && sch /= "https" -> Left $ "Unsupported URI scheme: " <> safeDecodeUtf8 sch
|
|
| otherwise -> case uriAuthority of
|
|
Nothing -> Left "No URI host"
|
|
Just U.Authority {authorityHost = U.Host h}
|
|
| '.' `B.notElem` h -> Left $ "Invalid URI host: " <> safeDecodeUtf8 h
|
|
| otherwise -> Right uri
|
|
|
|
sanitizeUri :: U.URI -> Maybe U.URI
|
|
sanitizeUri uri@U.URI {uriQuery = U.Query originalQS} =
|
|
let sanitizedQS = filter (\(p, _) -> p == "q" || p == "search") originalQS
|
|
in if length sanitizedQS == length originalQS
|
|
then Nothing
|
|
else Just $ uri {U.uriQuery = U.Query sanitizedQS}
|
|
|
|
markdownText :: FormattedText -> Text
|
|
markdownText (FormattedText f_ t) = case f_ of
|
|
Nothing -> t
|
|
Just f -> case f of
|
|
Bold -> around '*'
|
|
Italic -> around '_'
|
|
StrikeThrough -> around '~'
|
|
Snippet -> around '`'
|
|
Secret -> around '#'
|
|
Colored (FormatColor c) -> color c
|
|
Uri -> t
|
|
HyperLink {} -> t
|
|
SimplexLink {} -> t
|
|
Mention _ -> t
|
|
Command _ -> t
|
|
Email -> t
|
|
Phone -> t
|
|
Unknown _ -> t
|
|
where
|
|
around c = c `T.cons` t `T.snoc` c
|
|
color c = case colorStr c of
|
|
Just cStr -> cStr <> t `T.snoc` '!'
|
|
Nothing -> t
|
|
colorStr = \case
|
|
Red -> Just "!1 "
|
|
Green -> Just "!2 "
|
|
Blue -> Just "!3 "
|
|
Yellow -> Just "!4 "
|
|
Cyan -> Just "!5 "
|
|
Magenta -> Just "!6 "
|
|
Black -> Nothing
|
|
White -> Nothing
|
|
|
|
displayNameTextP :: Parser Text
|
|
displayNameTextP = displayNameTextP_ >>= \(t, sfx) -> if T.null sfx then pure t else fail "Name ends with punctuation"
|
|
{-# INLINE displayNameTextP #-}
|
|
|
|
displayNameTextP_ :: Parser (Text, Text)
|
|
displayNameTextP_ = (,"") <$> quoted '\'' <|> splitPunctuation <$> takeNameTill isSpace
|
|
where
|
|
takeNameTill p =
|
|
A.peekChar' >>= \c ->
|
|
if refChar c then A.takeTill p else fail "invalid first character in display name"
|
|
splitPunctuation s = (T.dropWhileEnd isPunctuation s, T.takeWhileEnd isPunctuation s)
|
|
quoted c = A.char c *> takeNameTill (== c) <* A.char c
|
|
refChar c = c > ' ' && c /= '#' && c /= '@' && c /= '\''
|
|
|
|
commandTextP :: Parser (Text, Text)
|
|
commandTextP = do
|
|
(cmd, punct) <- displayNameTextP_
|
|
case T.words cmd of
|
|
(keyword : _) | T.all (\c -> isAlpha c || isDigit c || c == '_') keyword -> pure (cmd, punct)
|
|
_ -> fail "invalid command keyword"
|
|
|
|
-- quotes names that contain spaces or end on punctuation
|
|
viewName :: Text -> Text
|
|
viewName s = if T.any isSpace s || maybe False (isPunctuation . snd) (T.unsnoc s) then "'" <> s <> "'" else s
|
|
|
|
$(JQ.deriveJSON (enumJSON $ dropPrefix "XL") ''SimplexLinkType)
|
|
|
|
$(JQ.deriveToJSON (sumTypeJSON fstToLower) ''Format)
|
|
|
|
instance FromJSON Format where
|
|
parseJSON v = $(JQ.mkParseJSON (sumTypeJSON fstToLower) ''Format) v <|> pure (Unknown v)
|
|
|
|
$(JQ.deriveJSON defaultJSON ''FormattedText)
|
|
|
|
$(JQ.deriveToJSON defaultJSON ''ParsedMarkdown)
|