mirror of
https://github.com/simplex-chat/simplex-chat.git
synced 2026-03-30 18:35:49 +00:00
core: improve markdown parsing and recognise URIs (#352)
This commit is contained in:
committed by
GitHub
parent
efa22715d5
commit
48dbd079cf
@@ -32,6 +32,7 @@ data Format
|
||||
| Snippet
|
||||
| Secret
|
||||
| Colored FormatColor
|
||||
| Uri
|
||||
deriving (Eq, Show, Generic)
|
||||
|
||||
colored :: Color -> Format
|
||||
@@ -42,7 +43,19 @@ markdown = Markdown . Just
|
||||
|
||||
instance ToJSON Format where toEncoding = J.genericToEncoding $ sumTypeJSON fstToLower
|
||||
|
||||
instance Semigroup Markdown where (<>) = (:|:)
|
||||
instance Semigroup Markdown where
|
||||
m <> (Markdown _ "") = m
|
||||
(Markdown _ "") <> m = m
|
||||
m1@(Markdown f1 s1) <> m2@(Markdown f2 s2)
|
||||
| f1 == f2 = Markdown f1 $ s1 <> s2
|
||||
| otherwise = m1 :|: m2
|
||||
m1@(Markdown f1 s1) <> ms@(Markdown f2 s2 :|: m3)
|
||||
| f1 == f2 = Markdown f1 (s1 <> s2) :|: m3
|
||||
| otherwise = m1 :|: ms
|
||||
ms@(m1 :|: Markdown f2 s2) <> m3@(Markdown f3 s3)
|
||||
| f2 == f3 = m1 :|: Markdown f2 (s2 <> s3)
|
||||
| otherwise = ms :|: m3
|
||||
m1 <> m2 = m1 :|: m2
|
||||
|
||||
instance Monoid Markdown where mempty = unmarked ""
|
||||
|
||||
@@ -125,20 +138,18 @@ parseMarkdown :: Text -> Markdown
|
||||
parseMarkdown s = fromRight (unmarked s) $ A.parseOnly (markdownP <* A.endOfInput) s
|
||||
|
||||
markdownP :: Parser Markdown
|
||||
markdownP = merge <$> A.many' fragmentP
|
||||
markdownP = mconcat <$> A.many' fragmentP
|
||||
where
|
||||
merge :: [Markdown] -> Markdown
|
||||
merge [] = ""
|
||||
merge fs = foldr1 (:|:) fs
|
||||
fragmentP :: Parser Markdown
|
||||
fragmentP =
|
||||
A.anyChar >>= \case
|
||||
' ' -> unmarked . T.cons ' ' <$> A.takeWhile (== ' ')
|
||||
c -> case M.lookup c formats of
|
||||
Just Secret -> secretP
|
||||
Just (Colored (FormatColor White)) -> coloredP
|
||||
Just f -> formattedP c "" f
|
||||
Nothing -> unformattedP c
|
||||
A.peekChar >>= \case
|
||||
Just ' ' -> unmarked <$> A.takeWhile (== ' ')
|
||||
Just c -> case M.lookup c formats of
|
||||
Just Secret -> A.char secretMD *> secretP
|
||||
Just (Colored (FormatColor White)) -> A.char colorMD *> coloredP
|
||||
Just f -> A.char c *> formattedP c "" f
|
||||
Nothing -> wordsP
|
||||
Nothing -> fail ""
|
||||
formattedP :: Char -> Text -> Format -> Parser Markdown
|
||||
formattedP c p f = do
|
||||
s <- A.takeTill (== c)
|
||||
@@ -166,15 +177,18 @@ markdownP = merge <$> A.many' fragmentP
|
||||
in (A.char ' ' *> formattedP colorMD (color `T.snoc` ' ') f)
|
||||
<|> noFormat (colorMD `T.cons` color)
|
||||
_ -> noFormat (colorMD `T.cons` color)
|
||||
unformattedP :: Char -> Parser Markdown
|
||||
unformattedP c = unmarked . T.cons c <$> wordsP
|
||||
wordsP :: Parser Text
|
||||
wordsP :: Parser Markdown
|
||||
wordsP = do
|
||||
s <- (<>) <$> A.takeTill (== ' ') <*> A.takeWhile (== ' ')
|
||||
word <- wordMD <$> A.takeTill (== ' ')
|
||||
s <- (word <>) <$> (unmarked <$> A.takeWhile (== ' '))
|
||||
A.peekChar >>= \case
|
||||
Nothing -> pure s
|
||||
Just c -> case M.lookup c formats of
|
||||
Just _ -> pure s
|
||||
Nothing -> (s <>) <$> wordsP
|
||||
wordMD :: Text -> Markdown
|
||||
wordMD s
|
||||
| "http://" `T.isPrefixOf` s || "https://" `T.isPrefixOf` s || "simplex:/" `T.isPrefixOf` s = markdown Uri s
|
||||
| otherwise = unmarked s
|
||||
noFormat :: Text -> Parser Markdown
|
||||
noFormat = pure . unmarked
|
||||
|
||||
@@ -74,7 +74,7 @@ sgr = \case
|
||||
StrikeThrough -> [SetSwapForegroundBackground True]
|
||||
Colored (FormatColor c) -> [SetColor Foreground Vivid c]
|
||||
Secret -> [SetColor Foreground Dull Black, SetColor Background Dull Black]
|
||||
Snippet -> []
|
||||
_ -> []
|
||||
|
||||
unStyle :: StyledString -> String
|
||||
unStyle (Styled _ s) = s
|
||||
|
||||
@@ -13,40 +13,41 @@ markdownTests = do
|
||||
textFormat
|
||||
secretText
|
||||
textColor
|
||||
textWithUri
|
||||
|
||||
textFormat :: Spec
|
||||
textFormat = describe "text format (bold)" do
|
||||
it "correct markdown" do
|
||||
parseMarkdown "this is *bold formatted* text"
|
||||
`shouldBe` "this is " <> markdown Bold "bold formatted" <> " " <> "text"
|
||||
`shouldBe` "this is " <> markdown Bold "bold formatted" <> " text"
|
||||
parseMarkdown "*bold formatted* text"
|
||||
`shouldBe` markdown Bold "bold formatted" <> " " <> "text"
|
||||
`shouldBe` markdown Bold "bold formatted" <> " text"
|
||||
parseMarkdown "this is *bold*"
|
||||
`shouldBe` "this is " <> markdown Bold "bold"
|
||||
parseMarkdown " *bold* text"
|
||||
`shouldBe` " " <> markdown Bold "bold" <> " " <> "text"
|
||||
`shouldBe` " " <> markdown Bold "bold" <> " text"
|
||||
parseMarkdown " *bold* text"
|
||||
`shouldBe` " " <> markdown Bold "bold" <> " " <> "text"
|
||||
`shouldBe` " " <> markdown Bold "bold" <> " text"
|
||||
parseMarkdown "this is *bold* "
|
||||
`shouldBe` "this is " <> markdown Bold "bold" <> " "
|
||||
parseMarkdown "this is *bold* "
|
||||
`shouldBe` "this is " <> markdown Bold "bold" <> " "
|
||||
it "ignored as markdown" do
|
||||
parseMarkdown "this is * unformatted * text"
|
||||
`shouldBe` "this is " <> "* unformatted *" <> " " <> "text"
|
||||
`shouldBe` "this is * unformatted * text"
|
||||
parseMarkdown "this is *unformatted * text"
|
||||
`shouldBe` "this is " <> "*unformatted *" <> " " <> "text"
|
||||
`shouldBe` "this is *unformatted * text"
|
||||
parseMarkdown "this is * unformatted* text"
|
||||
`shouldBe` "this is " <> "* unformatted*" <> " " <> "text"
|
||||
`shouldBe` "this is * unformatted* text"
|
||||
parseMarkdown "this is **unformatted** text"
|
||||
`shouldBe` "this is " <> "**" <> "unformatted** text"
|
||||
`shouldBe` "this is **unformatted** text"
|
||||
parseMarkdown "this is*unformatted* text"
|
||||
`shouldBe` "this is*unformatted* text"
|
||||
parseMarkdown "this is *unformatted text"
|
||||
`shouldBe` "this is " <> "*unformatted text"
|
||||
`shouldBe` "this is *unformatted text"
|
||||
it "ignored internal markdown" do
|
||||
parseMarkdown "this is *long _bold_ (not italic)* text"
|
||||
`shouldBe` "this is " <> markdown Bold "long _bold_ (not italic)" <> " " <> "text"
|
||||
`shouldBe` "this is " <> markdown Bold "long _bold_ (not italic)" <> " text"
|
||||
parseMarkdown "snippet: `this is *bold text*`"
|
||||
`shouldBe` "snippet: " <> markdown Snippet "this is *bold text*"
|
||||
|
||||
@@ -54,36 +55,36 @@ secretText :: Spec
|
||||
secretText = describe "secret text" do
|
||||
it "correct markdown" do
|
||||
parseMarkdown "this is #black_secret# text"
|
||||
`shouldBe` "this is " <> markdown Secret "black_secret" <> " " <> "text"
|
||||
`shouldBe` "this is " <> markdown Secret "black_secret" <> " text"
|
||||
parseMarkdown "##black_secret### text"
|
||||
`shouldBe` markdown Secret "#black_secret##" <> " " <> "text"
|
||||
`shouldBe` markdown Secret "#black_secret##" <> " text"
|
||||
parseMarkdown "this is #black secret# text"
|
||||
`shouldBe` "this is " <> markdown Secret "black secret" <> " " <> "text"
|
||||
`shouldBe` "this is " <> markdown Secret "black secret" <> " text"
|
||||
parseMarkdown "##black secret### text"
|
||||
`shouldBe` markdown Secret "#black secret##" <> " " <> "text"
|
||||
`shouldBe` markdown Secret "#black secret##" <> " text"
|
||||
parseMarkdown "this is #secret#"
|
||||
`shouldBe` "this is " <> markdown Secret "secret"
|
||||
parseMarkdown " #secret# text"
|
||||
`shouldBe` " " <> markdown Secret "secret" <> " " <> "text"
|
||||
`shouldBe` " " <> markdown Secret "secret" <> " text"
|
||||
parseMarkdown " #secret# text"
|
||||
`shouldBe` " " <> markdown Secret "secret" <> " " <> "text"
|
||||
`shouldBe` " " <> markdown Secret "secret" <> " text"
|
||||
parseMarkdown "this is #secret# "
|
||||
`shouldBe` "this is " <> markdown Secret "secret" <> " "
|
||||
parseMarkdown "this is #secret# "
|
||||
`shouldBe` "this is " <> markdown Secret "secret" <> " "
|
||||
it "ignored as markdown" do
|
||||
parseMarkdown "this is # unformatted # text"
|
||||
`shouldBe` "this is " <> "# unformatted #" <> " " <> "text"
|
||||
`shouldBe` "this is # unformatted # text"
|
||||
parseMarkdown "this is #unformatted # text"
|
||||
`shouldBe` "this is " <> "#unformatted #" <> " " <> "text"
|
||||
`shouldBe` "this is #unformatted # text"
|
||||
parseMarkdown "this is # unformatted# text"
|
||||
`shouldBe` "this is " <> "# unformatted#" <> " " <> "text"
|
||||
`shouldBe` "this is # unformatted# text"
|
||||
parseMarkdown "this is ## unformatted ## text"
|
||||
`shouldBe` "this is " <> "## unformatted ##" <> " " <> "text"
|
||||
`shouldBe` "this is ## unformatted ## text"
|
||||
parseMarkdown "this is#unformatted# text"
|
||||
`shouldBe` "this is#unformatted# text"
|
||||
parseMarkdown "this is #unformatted text"
|
||||
`shouldBe` "this is " <> "#unformatted text"
|
||||
`shouldBe` "this is #unformatted text"
|
||||
it "ignored internal markdown" do
|
||||
parseMarkdown "snippet: `this is #secret_text#`"
|
||||
`shouldBe` "snippet: " <> markdown Snippet "this is #secret_text#"
|
||||
@@ -95,34 +96,48 @@ textColor :: Spec
|
||||
textColor = describe "text color (red)" do
|
||||
it "correct markdown" do
|
||||
parseMarkdown "this is !1 red color! text"
|
||||
`shouldBe` "this is " <> red "red color" <> " " <> "text"
|
||||
`shouldBe` "this is " <> red "red color" <> " text"
|
||||
parseMarkdown "!1 red! text"
|
||||
`shouldBe` red "red" <> " " <> "text"
|
||||
`shouldBe` red "red" <> " text"
|
||||
parseMarkdown "this is !1 red!"
|
||||
`shouldBe` "this is " <> red "red"
|
||||
parseMarkdown " !1 red! text"
|
||||
`shouldBe` " " <> red "red" <> " " <> "text"
|
||||
`shouldBe` " " <> red "red" <> " text"
|
||||
parseMarkdown " !1 red! text"
|
||||
`shouldBe` " " <> red "red" <> " " <> "text"
|
||||
`shouldBe` " " <> red "red" <> " text"
|
||||
parseMarkdown "this is !1 red! "
|
||||
`shouldBe` "this is " <> red "red" <> " "
|
||||
parseMarkdown "this is !1 red! "
|
||||
`shouldBe` "this is " <> red "red" <> " "
|
||||
it "ignored as markdown" do
|
||||
parseMarkdown "this is !1 unformatted ! text"
|
||||
`shouldBe` "this is " <> "!1 unformatted !" <> " " <> "text"
|
||||
`shouldBe` "this is !1 unformatted ! text"
|
||||
parseMarkdown "this is !1 unformatted ! text"
|
||||
`shouldBe` "this is " <> "!1 unformatted !" <> " " <> "text"
|
||||
`shouldBe` "this is !1 unformatted ! text"
|
||||
parseMarkdown "this is !1 unformatted! text"
|
||||
`shouldBe` "this is " <> "!1 unformatted!" <> " " <> "text"
|
||||
`shouldBe` "this is !1 unformatted! text"
|
||||
-- parseMarkdown "this is !!1 unformatted!! text"
|
||||
-- `shouldBe` "this is " <> "!!1" <> "unformatted!! text"
|
||||
parseMarkdown "this is!1 unformatted! text"
|
||||
`shouldBe` "this is!1 unformatted! text"
|
||||
parseMarkdown "this is !1 unformatted text"
|
||||
`shouldBe` "this is " <> "!1 unformatted text"
|
||||
`shouldBe` "this is !1 unformatted text"
|
||||
it "ignored internal markdown" do
|
||||
parseMarkdown "this is !1 long *red* (not bold)! text"
|
||||
`shouldBe` "this is " <> red "long *red* (not bold)" <> " " <> "text"
|
||||
`shouldBe` "this is " <> red "long *red* (not bold)" <> " text"
|
||||
parseMarkdown "snippet: `this is !1 red text!`"
|
||||
`shouldBe` "snippet: " <> markdown Snippet "this is !1 red text!"
|
||||
|
||||
uri :: Text -> Markdown
|
||||
uri = Markdown $ Just Uri
|
||||
|
||||
textWithUri :: Spec
|
||||
textWithUri = describe "text with Uri" do
|
||||
it "correct markdown" do
|
||||
parseMarkdown "https://simplex.chat" `shouldBe` uri "https://simplex.chat"
|
||||
parseMarkdown "http://simplex.chat" `shouldBe` uri "http://simplex.chat"
|
||||
parseMarkdown "this is https://simplex.chat" `shouldBe` "this is " <> uri "https://simplex.chat"
|
||||
parseMarkdown "https://simplex.chat site" `shouldBe` uri "https://simplex.chat" <> " site"
|
||||
it "ignored as markdown" do
|
||||
parseMarkdown "_https://simplex.chat" `shouldBe` "_https://simplex.chat"
|
||||
parseMarkdown "this is _https://simplex.chat" `shouldBe` "this is _https://simplex.chat"
|
||||
|
||||
Reference in New Issue
Block a user