mirror of
https://github.com/simplex-chat/simplexmq.git
synced 2026-06-04 10:51:27 +00:00
ntf server: better batching and logging (#780)
* ntf server: better batching and logging * reduce batch delay for ntf server * comments * 5.1.3, ntf 1.4.2 * more logging * more logging * split large batches, more logging * remove some logs
This commit is contained in:
committed by
GitHub
parent
3a74558e84
commit
4a927d1ae2
@@ -429,7 +429,7 @@ data NtfSubStatus
|
||||
NSAuth
|
||||
| -- | SMP error other than AUTH
|
||||
NSErr ByteString
|
||||
deriving (Eq, Show)
|
||||
deriving (Eq, Ord, Show)
|
||||
|
||||
ntfShouldSubscribe :: NtfSubStatus -> Bool
|
||||
ntfShouldSubscribe = \case
|
||||
|
||||
@@ -16,16 +16,17 @@ import Control.Concurrent.STM (stateTVar)
|
||||
import Control.Logger.Simple
|
||||
import Control.Monad.Except
|
||||
import Control.Monad.Reader
|
||||
import Data.Bifunctor (second)
|
||||
import Data.ByteString.Char8 (ByteString)
|
||||
import qualified Data.ByteString.Char8 as B
|
||||
import Data.Function (on)
|
||||
import Data.Functor (($>))
|
||||
import Data.Int (Int64)
|
||||
import Data.List (intercalate)
|
||||
import Data.List (intercalate, sort)
|
||||
import Data.List.NonEmpty (NonEmpty(..))
|
||||
import qualified Data.List.NonEmpty as L
|
||||
import Data.Map.Strict (Map)
|
||||
import qualified Data.Map.Strict as M
|
||||
import Data.Maybe (catMaybes)
|
||||
import qualified Data.Text as T
|
||||
import Data.Text.Encoding (decodeLatin1)
|
||||
import Data.Time.Clock (UTCTime (..), diffTimeToPicoseconds, getCurrentTime)
|
||||
@@ -55,11 +56,10 @@ import System.Exit (exitFailure)
|
||||
import System.IO (BufferMode (..), hPutStrLn, hSetBuffering)
|
||||
import System.Mem.Weak (deRefWeak)
|
||||
import UnliftIO (IOMode (..), async, uninterruptibleCancel, withFile)
|
||||
import UnliftIO.Concurrent (forkIO, killThread, mkWeakThreadId, threadDelay)
|
||||
import UnliftIO.Concurrent (forkIO, killThread, mkWeakThreadId)
|
||||
import UnliftIO.Directory (doesFileExist, renameFile)
|
||||
import UnliftIO.Exception
|
||||
import UnliftIO.STM
|
||||
import Data.Bifunctor (second)
|
||||
|
||||
runNtfServer :: NtfServerConfig -> IO ()
|
||||
runNtfServer cfg = do
|
||||
@@ -147,23 +147,28 @@ ntfServer cfg@NtfServerConfig {transports, logTLSErrors} started = do
|
||||
resubscribe :: NtfSubscriber -> Map NtfSubscriptionId NtfSubData -> M ()
|
||||
resubscribe NtfSubscriber {newSubQ} subs = do
|
||||
subs' <- atomically $ filterM (fmap ntfShouldSubscribe . readTVar . subStatus) $ M.elems subs
|
||||
mapM_ (atomically . writeTBQueue newSubQ . L.map NtfSub) $ L.nonEmpty subs'
|
||||
liftIO $ logInfo "SMP connections resubscribed"
|
||||
atomically . writeTBQueue newSubQ $ map NtfSub subs'
|
||||
liftIO $ logInfo $ "SMP resubscriptions queued (" <> tshow (length subs') <> " subscriptions)"
|
||||
|
||||
ntfSubscriber :: NtfSubscriber -> M ()
|
||||
ntfSubscriber NtfSubscriber {smpSubscribers, newSubQ, smpAgent = ca@SMPClientAgent {msgQ, agentQ}} = do
|
||||
raceAny_ [subscribe, receiveSMP, receiveAgent]
|
||||
where
|
||||
subscribe :: M ()
|
||||
subscribe = do
|
||||
d <- asks $ resubscribeDelay . config
|
||||
forever $ do
|
||||
subs <- atomically (readTBQueue newSubQ)
|
||||
let ss = L.groupBy ((==) `on` server) subs
|
||||
forM_ ss $ \serverSubs -> do
|
||||
SMPSubscriber {newSubQ = subscriberSubQ} <- getSMPSubscriber $ server $ L.head serverSubs
|
||||
atomically $ writeTQueue subscriberSubQ serverSubs
|
||||
when (length serverSubs > 10) $ threadDelay d
|
||||
subscribe = forever $ do
|
||||
subs <- atomically (readTBQueue newSubQ)
|
||||
let ss = L.groupAllWith server subs
|
||||
forM_ ss $ \serverSubs -> do
|
||||
let srv = server $ L.head serverSubs
|
||||
batches = toChunks 900 $ L.toList serverSubs
|
||||
SMPSubscriber {newSubQ = subscriberSubQ} <- getSMPSubscriber srv
|
||||
mapM_ (atomically . writeTQueue subscriberSubQ) batches
|
||||
|
||||
toChunks :: Int -> [a] -> [NonEmpty a]
|
||||
toChunks _ [] = []
|
||||
toChunks n xs =
|
||||
let (ys, xs') = splitAt n xs
|
||||
in maybe id (:) (L.nonEmpty ys) (toChunks n xs')
|
||||
|
||||
server :: NtfEntityRec 'Subscription -> SMPServer
|
||||
server (NtfSub sub) = ntfSubServer sub
|
||||
@@ -184,21 +189,26 @@ ntfSubscriber NtfSubscriber {smpSubscribers, newSubQ, smpAgent = ca@SMPClientAge
|
||||
forever $ do
|
||||
subs <- atomically (peekTQueue subscriberSubQ)
|
||||
let subs' = L.map (\(NtfSub sub) -> sub) subs
|
||||
srv = server $ L.head subs
|
||||
logSubStatus srv "subscribing" $ length subs
|
||||
mapM_ (\NtfSubData {smpQueue} -> updateSubStatus smpQueue NSPending) subs'
|
||||
rs <- liftIO $ subscribeQueues (server $ L.head subs) subs'
|
||||
subs_ <- L.nonEmpty <$> foldM process [] rs
|
||||
rs <- liftIO $ subscribeQueues srv subs'
|
||||
(subs'', oks, errs) <- foldM process ([], 0, []) rs
|
||||
atomically $ do
|
||||
void $ readTQueue subscriberSubQ
|
||||
mapM_ (writeTQueue subscriberSubQ . L.map NtfSub) subs_
|
||||
mapM_ (writeTQueue subscriberSubQ . L.map NtfSub) $ L.nonEmpty subs''
|
||||
logSubStatus srv "retrying" $ length subs''
|
||||
logSubStatus srv "subscribed" oks
|
||||
logSubErrors srv errs
|
||||
where
|
||||
process subs (sub@NtfSubData {smpQueue}, r) = case r of
|
||||
Right _ -> updateSubStatus smpQueue NSActive $> subs
|
||||
Left err -> do
|
||||
handleSubError smpQueue err
|
||||
pure $ case err of
|
||||
PCEResponseTimeout -> sub : subs
|
||||
PCENetworkError -> sub : subs
|
||||
_ -> subs
|
||||
process :: ([NtfSubData], Int, [NtfSubStatus]) -> (NtfSubData, Either SMPClientError ()) -> M ([NtfSubData], Int, [NtfSubStatus])
|
||||
process (subs, oks, errs) (sub@NtfSubData {smpQueue}, r) = case r of
|
||||
Right _ -> updateSubStatus smpQueue NSActive $> (subs, oks + 1, errs)
|
||||
Left e -> update <$> handleSubError smpQueue e
|
||||
where
|
||||
update = \case
|
||||
Just err -> (subs, oks, err : errs) -- permanent error, log and don't retry subscription
|
||||
Nothing -> (sub : subs, oks, errs) -- temporary error, retry subscription
|
||||
|
||||
-- | Subscribe to queues. The list of results can have a different order.
|
||||
subscribeQueues :: SMPServer -> NonEmpty NtfSubData -> IO (NonEmpty (NtfSubData, Either SMPClientError ()))
|
||||
@@ -230,37 +240,43 @@ ntfSubscriber NtfSubscriber {smpSubscribers, newSubQ, smpAgent = ca@SMPClientAge
|
||||
atomically (readTBQueue agentQ) >>= \case
|
||||
CAConnected _ -> pure ()
|
||||
CADisconnected srv subs -> do
|
||||
logInfo $ "SMP server disconnected " <> showServer' srv <> " (" <> tshow (length subs) <> ") subscriptions"
|
||||
logSubStatus srv "disconnected" $ length subs
|
||||
forM_ subs $ \(_, ntfId) -> do
|
||||
let smpQueue = SMPQueueNtf srv ntfId
|
||||
updateSubStatus smpQueue NSInactive
|
||||
CAReconnected srv ->
|
||||
logInfo $ "SMP server reconnected " <> showServer' srv
|
||||
CAResubscribed srv sub -> do
|
||||
let ntfId = snd sub
|
||||
smpQueue = SMPQueueNtf srv ntfId
|
||||
updateSubStatus smpQueue NSActive
|
||||
CASubError srv (_, ntfId) err -> do
|
||||
logError $ "SMP subscription error on server " <> showServer' srv <> ": " <> tshow err
|
||||
handleSubError (SMPQueueNtf srv ntfId) err
|
||||
where
|
||||
showServer' = decodeLatin1 . strEncode . host
|
||||
CAResubscribed srv subs -> do
|
||||
forM_ subs $ \(_, ntfId) -> updateSubStatus (SMPQueueNtf srv ntfId) NSActive
|
||||
logSubStatus srv "resubscribed" $ length subs
|
||||
CASubError srv errs ->
|
||||
forM errs (\((_, ntfId), err) -> handleSubError (SMPQueueNtf srv ntfId) err)
|
||||
>>= logSubErrors srv . catMaybes . L.toList
|
||||
|
||||
handleSubError :: SMPQueueNtf -> SMPClientError -> M ()
|
||||
logSubStatus srv event n = when (n > 0) $
|
||||
logInfo $ "SMP server " <> event <> " " <> showServer' srv <> " (" <> tshow n <> " subscriptions)"
|
||||
|
||||
logSubErrors :: SMPServer -> [NtfSubStatus] -> M ()
|
||||
logSubErrors srv errs = forM_ (L.group $ sort errs) $ \errs' -> do
|
||||
logError $ "SMP subscription errors on server " <> showServer' srv <> ": " <> tshow (L.head errs') <> " (" <> tshow (length errs') <> " errors)"
|
||||
|
||||
showServer' = decodeLatin1 . strEncode . host
|
||||
|
||||
handleSubError :: SMPQueueNtf -> SMPClientError -> M (Maybe NtfSubStatus)
|
||||
handleSubError smpQueue = \case
|
||||
PCEProtocolError AUTH -> updateSubStatus smpQueue NSAuth
|
||||
PCEProtocolError AUTH -> updateSubStatus smpQueue NSAuth $> Just NSAuth
|
||||
PCEProtocolError e -> updateErr "SMP error " e
|
||||
PCEIOError e -> updateErr "IOError " e
|
||||
PCEResponseError e -> updateErr "ResponseError " e
|
||||
PCEUnexpectedResponse r -> updateErr "UnexpectedResponse " r
|
||||
PCETransportError e -> updateErr "TransportError " e
|
||||
PCECryptoError e -> updateErr "CryptoError " e
|
||||
PCEIncompatibleHost -> updateSubStatus smpQueue $ NSErr "IncompatibleHost"
|
||||
PCEResponseTimeout -> pure ()
|
||||
PCENetworkError -> pure ()
|
||||
PCEIncompatibleHost -> let e = NSErr "IncompatibleHost" in updateSubStatus smpQueue e $> Just e
|
||||
PCEResponseTimeout -> pure Nothing
|
||||
PCENetworkError -> pure Nothing
|
||||
PCEIOError _ -> pure Nothing
|
||||
where
|
||||
updateErr :: Show e => ByteString -> e -> M ()
|
||||
updateErr errType e = updateSubStatus smpQueue . NSErr $ errType <> bshow e
|
||||
updateErr :: Show e => ByteString -> e -> M (Maybe NtfSubStatus)
|
||||
updateErr errType e = updateSubStatus smpQueue (NSErr $ errType <> bshow e) $> Just (NSErr errType)
|
||||
|
||||
updateSubStatus smpQueue status = do
|
||||
st <- asks store
|
||||
@@ -354,7 +370,7 @@ receive th NtfServerClient {rcvQ, sndQ, activeAt} = forever $ do
|
||||
send :: Transport c => THandle c -> NtfServerClient -> IO ()
|
||||
send h@THandle {thVersion = v} NtfServerClient {sndQ, sessionId, activeAt} = forever $ do
|
||||
t <- atomically $ readTBQueue sndQ
|
||||
void . liftIO $ tPut h [(Nothing, encodeTransmission v sessionId t)]
|
||||
void . liftIO $ tPut h Nothing [(Nothing, encodeTransmission v sessionId t)]
|
||||
atomically . writeTVar activeAt =<< liftIO getSystemTime
|
||||
|
||||
-- instance Show a => Show (TVar a) where
|
||||
|
||||
@@ -48,7 +48,6 @@ data NtfServerConfig = NtfServerConfig
|
||||
apnsConfig :: APNSPushClientConfig,
|
||||
inactiveClientExpiration :: Maybe ExpirationConfig,
|
||||
storeLogFile :: Maybe FilePath,
|
||||
resubscribeDelay :: Int, -- microseconds
|
||||
-- CA certificate private key is not needed for initialization
|
||||
caCertificateFile :: FilePath,
|
||||
privateKeyFile :: FilePath,
|
||||
@@ -94,7 +93,7 @@ newNtfServerEnv config@NtfServerConfig {subQSize, pushQSize, smpAgentCfg, apnsCo
|
||||
|
||||
data NtfSubscriber = NtfSubscriber
|
||||
{ smpSubscribers :: TMap SMPServer SMPSubscriber,
|
||||
newSubQ :: TBQueue (NonEmpty (NtfEntityRec 'Subscription)),
|
||||
newSubQ :: TBQueue [NtfEntityRec 'Subscription],
|
||||
smpAgent :: SMPClientAgent
|
||||
}
|
||||
|
||||
|
||||
@@ -14,7 +14,8 @@ import Data.Maybe (fromMaybe)
|
||||
import qualified Data.Text as T
|
||||
import Network.Socket (HostName)
|
||||
import Options.Applicative
|
||||
import Simplex.Messaging.Client.Agent (defaultSMPClientAgentConfig)
|
||||
import Simplex.Messaging.Client (ProtocolClientConfig (..))
|
||||
import Simplex.Messaging.Client.Agent (SMPClientAgentConfig (..), defaultSMPClientAgentConfig)
|
||||
import qualified Simplex.Messaging.Crypto as C
|
||||
import Simplex.Messaging.Notifications.Server (runNtfServer)
|
||||
import Simplex.Messaging.Notifications.Server.Env (NtfServerConfig (..))
|
||||
@@ -28,7 +29,10 @@ import System.IO (BufferMode (..), hSetBuffering, stderr, stdout)
|
||||
import Text.Read (readMaybe)
|
||||
|
||||
ntfServerVersion :: String
|
||||
ntfServerVersion = "1.4.1"
|
||||
ntfServerVersion = "1.4.2"
|
||||
|
||||
defaultSMPBatchDelay :: Int
|
||||
defaultSMPBatchDelay = 10000
|
||||
|
||||
ntfServerCLI :: FilePath -> FilePath -> IO ()
|
||||
ntfServerCLI cfgPath logPath =
|
||||
@@ -80,7 +84,9 @@ ntfServerCLI cfgPath logPath =
|
||||
<> ("host: " <> host <> "\n")
|
||||
<> ("port: " <> defaultServerPort <> "\n")
|
||||
<> "log_tls_errors: off\n\
|
||||
\websockets: off\n"
|
||||
\# delay between command batches sent to SMP relays (microseconds), 0 to disable\n"
|
||||
<> ("smp_batch_delay: " <> show defaultSMPBatchDelay <> "\n")
|
||||
<> "websockets: off\n"
|
||||
runServer ini = do
|
||||
hSetBuffering stdout LineBuffering
|
||||
hSetBuffering stderr LineBuffering
|
||||
@@ -96,19 +102,20 @@ ntfServerCLI cfgPath logPath =
|
||||
enableStoreLog = settingIsOn "STORE_LOG" "enable" ini
|
||||
logStats = settingIsOn "STORE_LOG" "log_stats" ini
|
||||
c = combine cfgPath . ($ defaultX509Config)
|
||||
smpBatchDelay = readIniDefault defaultSMPBatchDelay "TRANSPORT" "smp_batch_delay" ini
|
||||
batchDelay = if smpBatchDelay <= 0 then Nothing else Just smpBatchDelay
|
||||
serverConfig =
|
||||
NtfServerConfig
|
||||
{ transports = iniTransports ini,
|
||||
subIdBytes = 24,
|
||||
regCodeBytes = 32,
|
||||
clientQSize = 16,
|
||||
subQSize = 64,
|
||||
pushQSize = 128,
|
||||
smpAgentCfg = defaultSMPClientAgentConfig,
|
||||
clientQSize = 64,
|
||||
subQSize = 512,
|
||||
pushQSize = 1048,
|
||||
smpAgentCfg = defaultSMPClientAgentConfig {smpCfg = (smpCfg defaultSMPClientAgentConfig) {batchDelay}},
|
||||
apnsConfig = defaultAPNSPushClientConfig,
|
||||
inactiveClientExpiration = Nothing,
|
||||
storeLogFile = enableStoreLog $> storeLogFilePath,
|
||||
resubscribeDelay = 50000, -- 50ms
|
||||
caCertificateFile = c caCrtFile,
|
||||
privateKeyFile = c serverKeyFile,
|
||||
certificateFile = c serverCrtFile,
|
||||
|
||||
Reference in New Issue
Block a user