From b801d77c74f0b688000e0bc2194e835bd1d1965e Mon Sep 17 00:00:00 2001 From: sh <37271604+shumvgolove@users.noreply.github.com> Date: Wed, 20 May 2026 08:21:43 +0000 Subject: [PATCH] cli: fix redraw slowness (#6735) * cli: add pland to fix redraw slowness * updtae doc * cli: decouple key reading from processing via TQueue --- plans/cli-paste-slowness.md | 111 +++++++++++++++++++++++++++++ src/Simplex/Chat/Terminal.hs | 10 ++- src/Simplex/Chat/Terminal/Input.hs | 14 ++-- 3 files changed, 127 insertions(+), 8 deletions(-) create mode 100644 plans/cli-paste-slowness.md diff --git a/plans/cli-paste-slowness.md b/plans/cli-paste-slowness.md new file mode 100644 index 0000000000..33255996be --- /dev/null +++ b/plans/cli-paste-slowness.md @@ -0,0 +1,111 @@ +# CLI terminal: event loss root cause analysis + +## Two distinct problems + +### Problem 1: Paste — TMVar capacity-1 bottleneck + +When copy-pasting text, the capacity-1 `TMVar` event channel between the keyboard input reader and the consumer loop throttles stdin reading to terminal redraw speed. + +**Root cause:** `events <- liftIO newEmptyTMVarIO` (`Platform.hsc:64`). Producer blocks on `putTMVar` after each event until consumer finishes redrawing. Consumer does a full terminal redraw per event (`Input.hs:161`). + +**Fix:** Replace `TMVar` with `TQueue` in `Platform.hsc` (6 line changes on POSIX, matching changes on Windows). Decouples producer from consumer — stdin is drained at full speed regardless of redraw speed. + +See previous analysis in git history for full details on this issue. + +--- + +### Problem 2: Heavy load — `outputQ` backpressure blocks `agentSubscriber` + +When the CLI is used as a heavy client (e.g., 1M connections), incoming chat events overwhelm the terminal display, causing cascading backpressure that blocks message acknowledgments and stalls the entire event processing pipeline. + +**This is the more severe problem.** It causes actual message loss at the protocol level, not just UI slowness. + +## Root cause: bounded `outputQ` + single-threaded `agentSubscriber` + +### The queue chain + +``` +Network (SMP/XFTP connections) + → agent internal queues + → subQ (TBQueue, capacity 1024) ← agent → chat boundary + → agentSubscriber (single-threaded) ← Commands.hs:4167 + → processAgentMessage ← Subscriber.hs:109 + → toView_ → writeTBQueue outputQ ← Controller.hs:1528, BLOCKS when full + → outputQ (TBQueue, capacity 1024) ← Chat.hs:152 + → runTerminalOutput ← Output.hs:146 + → printToTerminal (acquires termLock) ← Output.hs:298-303 + → terminal I/O (slow) +``` + +All queues are bounded `TBQueue` with default capacity 1024 (`Options.hs:226`). All writes use `writeTBQueue` which **blocks when full** — no events are dropped within the application, but backpressure cascades upstream. + +### The blocking chain under heavy load + +1. **Terminal I/O is the bottleneck.** `runTerminalOutput` (`Output.hs:146`) reads one event at a time from `outputQ`, acquires `termLock`, prints the message + redraws input, releases lock. Each iteration involves ANSI escape sequences, cursor manipulation, and `flush` syscalls. Throughput: ~hundreds of events/sec at best. + +2. **`outputQ` fills up.** With 1M connections generating events, the arrival rate far exceeds terminal display speed. The 1024-element TBQueue fills in seconds. + +3. **`toView_` blocks.** `Controller.hs:1528`: `writeTBQueue localQ (Nothing, event)` blocks when the queue is full. This call happens inside `processAgentMessage` → `processAgentMessageConn`, which runs within the `agentSubscriber` loop. + +4. **`agentSubscriber` blocks — head-of-line blocking.** `Commands.hs:4164-4167`: + ```haskell + agentSubscriber = do + q <- asks $ subQ . smpAgent + forever (atomically (readTBQueue q) >>= process) + ``` + Single-threaded. When `process` blocks on `toView_`, ALL events for ALL connections queue up behind it. Events for 1M other connections — including time-critical ACKs, keepalives, and handshakes — are stuck. + +5. **ACKs are never sent.** The message receive path (`Subscriber.hs:1537-1540`) calls `toView` BEFORE `ackMsg`: + ```haskell + -- Inside withAckMessage's action: + saveRcvChatItem' ... -- save to DB (succeeds) + toView $ CEvtNewChatItems ... -- BLOCKS here (outputQ full) + -- returns (withRcpt, shouldDelConns) + + -- After action returns (Subscriber.hs:1396-1397): + ackMsg msgMeta ... -- NEVER REACHED while toView blocks + ``` + The developers explicitly acknowledge this at `Subscriber.hs:122-123`: + > *without ACK the message delivery will be stuck* + +6. **`subQ` fills up.** The agent can't deliver events to `subQ` (also capacity 1024) because `agentSubscriber` isn't reading. Agent-level processing stalls. + +7. **Network-level failure.** Connections time out due to unprocessed keepalives and unacknowledged messages. Messages are lost at the protocol level. + +### `termLock` contention worsens the bottleneck + +`termLock` (`Output.hs:55`) is a `TMVar ()` mutex shared between: +- **Output thread** (`runTerminalOutput` → `printToTerminal`): acquires lock for each displayed message +- **Input thread** (`receiveFromTTY` → `updateInput`): acquires lock after each keystroke +- **Live prompt thread** (`blinkLivePrompt` → `updateInputView`): acquires lock every 1 second + +Under heavy load, the output thread dominates the lock (constant stream of messages). The input thread is starved — user keystrokes are delayed. This also slows the output thread itself (lock contention overhead). + +Note: `withTermLock` (`Output.hs:138-142`) is not exception-safe — no `bracket`/`finally`. If the action throws, the lock leaks and all threads deadlock. + +### Error reporting also blocks + +When `processAgentMessage` encounters an error, the error handler (`Commands.hs:4179`) calls `eToView'` → `toView_` → `writeTBQueue outputQ`. If `outputQ` is already full, even error reporting blocks. There is no escape path. + +## Impact summary + +| Load level | `outputQ` state | Effect | +|---|---|---| +| Light (few connections) | Nearly empty | No issues | +| Moderate (hundreds) | Partially filled | Occasional display lag | +| Heavy (thousands+) | Full (1024) | `toView_` blocks → `agentSubscriber` blocks → head-of-line blocking for ALL connections → ACKs delayed → message delivery stuck | +| Extreme (1M connections) | Permanently full | Cascading failure: all event processing stops, connections time out, messages lost at protocol level | + +## Fix + +The core fix: **`toView_` must never block the event processing pipeline on terminal display.** + +Options (in order of simplicity): + +1. **Make `outputQ` unbounded** — replace `TBQueue` with `TQueue` in `Chat.hs:152`. `writeTQueue` never blocks. Events accumulate in memory under heavy load but the event processing pipeline (including ACKs) is never stalled. Tradeoff: unbounded memory growth under sustained heavy load. + +2. **Non-blocking write with drop** — use `tryWriteTBQueue` in `toView_`. When `outputQ` is full, drop the display event (or a coalesced summary). ACKs and network processing proceed unblocked. Tradeoff: some events not displayed, but none lost at protocol level. + +3. **Separate ACK from display** — restructure `withAckMessage` to send ACK immediately after DB save, before `toView`. This decouples protocol correctness from display. `toView` can still block, but ACKs are always timely. Tradeoff: requires careful restructuring of the message processing path. + +4. **Increase queue capacity** — increase `tbqSize` from 1024 to a larger value. Delays the problem but doesn't fix it. Under sustained heavy load, any finite queue eventually fills. diff --git a/src/Simplex/Chat/Terminal.hs b/src/Simplex/Chat/Terminal.hs index 21781229e4..29299cfeae 100644 --- a/src/Simplex/Chat/Terminal.hs +++ b/src/Simplex/Chat/Terminal.hs @@ -8,6 +8,7 @@ module Simplex.Chat.Terminal where import Control.Monad +import Control.Monad.IO.Class (liftIO) import qualified Data.List.NonEmpty as L import Simplex.Chat (defaultChatConfig) import Simplex.Chat.Controller @@ -22,6 +23,8 @@ import Simplex.Chat.Terminal.Output import Simplex.FileTransfer.Client.Presets (defaultXFTPServers) import Simplex.Messaging.Client (NetworkConfig (..), SMPProxyFallback (..), SMPProxyMode (..), defaultNetworkConfig) import Simplex.Messaging.Util (raceAny_) +import System.Terminal (Key, Modifiers) +import UnliftIO.STM #if !defined(dbPostgres) import Control.Exception (handle, throwIO) import qualified Data.ByteArray as BA @@ -99,4 +102,9 @@ simplexChatTerminal cfg options t = run options #endif runChatTerminal :: ChatTerminal -> ChatController -> ChatOpts -> IO () -runChatTerminal ct cc opts = raceAny_ [runTerminalInput ct cc, runTerminalOutput ct cc opts, runInputLoop ct cc] +runChatTerminal ct cc opts = do + keyQ <- newTQueueIO + raceAny_ [runKeyReader ct keyQ, runTerminalInput ct cc keyQ, runTerminalOutput ct cc opts, runInputLoop ct cc] + +runKeyReader :: ChatTerminal -> TQueue (Key, Modifiers) -> IO () +runKeyReader ct q = withChatTerm ct $ forever $ getKey >>= liftIO . atomically . writeTQueue q diff --git a/src/Simplex/Chat/Terminal/Input.hs b/src/Simplex/Chat/Terminal/Input.hs index e0ee10aff9..effcb7a71c 100644 --- a/src/Simplex/Chat/Terminal/Input.hs +++ b/src/Simplex/Chat/Terminal/Input.hs @@ -152,14 +152,14 @@ sendUpdatedLiveMessage cc sentMsg LiveMessage {chatName, chatItemId} live = do let cmd = UpdateLiveMessage chatName chatItemId live $ T.pack sentMsg execChatCommand' cmd 0 `runReaderT` cc -runTerminalInput :: ChatTerminal -> ChatController -> IO () -runTerminalInput ct cc = withChatTerm ct $ do - updateInput ct - receiveFromTTY cc ct +runTerminalInput :: ChatTerminal -> ChatController -> TQueue (Key, Modifiers) -> IO () +runTerminalInput ct cc keyQ = do + updateInputView ct + receiveFromTTY keyQ cc ct -receiveFromTTY :: forall m. MonadTerminal m => ChatController -> ChatTerminal -> m () -receiveFromTTY cc@ChatController {inputQ, currentUser, currentRemoteHost, chatStore} ct@ChatTerminal {termSize, termState, liveMessageState, activeTo} = - forever $ getKey >>= liftIO . processKey >> withTermLock ct (updateInput ct) +receiveFromTTY :: TQueue (Key, Modifiers) -> ChatController -> ChatTerminal -> IO () +receiveFromTTY keyQ cc@ChatController {inputQ, currentUser, currentRemoteHost, chatStore} ct@ChatTerminal {termSize, termState, liveMessageState, activeTo} = + forever $ atomically (readTQueue keyQ) >>= processKey >> updateInputView ct where processKey :: (Key, Modifiers) -> IO () processKey key = case key of