Fix Grok revertStateOnFail race condition

This commit is contained in:
Narasimha-sc
2026-04-18 13:23:58 +00:00
parent fde776dcf2
commit c0bdf53ef9
3 changed files with 40 additions and 4 deletions
+33
View File
@@ -1717,6 +1717,39 @@ describe("State persistence in customData", () => {
expect(chat.customData.get(CUSTOMER_GROUP_ID)?.state).toBe("QUEUE")
})
test("concurrent /team during Grok activation timeout does not demote state", async () => {
await reachQueue()
addBotMessage("The team will reply to your message")
// Pause activateGrok at waitForGrokJoin so /team can run in the meantime.
// Patching apiAddMember won't work: it's wrapped in withMainProfile's mutex,
// which /team's activateTeam also needs. waitForGrokJoin awaits outside the
// mutex — that's the real race window in production.
let releaseJoin!: (joined: boolean) => void
;(bot as any).waitForGrokJoin = () =>
new Promise<boolean>((resolve) => { releaseJoin = resolve })
// /grok: writes state=GROK optimistically, fire-and-forgets activateGrok.
await bot.onNewChatItems(customerMessage("/grok"))
expect(chat.customData.get(CUSTOMER_GROUP_ID)?.state).toBe("GROK")
// Let activateGrok progress past apiAddMember into waitForGrokJoin.
await Promise.resolve()
await Promise.resolve()
// /team while activateGrok is waiting for join — writes TEAM-PENDING + adds members.
await bot.onNewChatItems(customerMessage("/team"))
expect(chat.customData.get(CUSTOMER_GROUP_ID)?.state).toBe("TEAM-PENDING")
expectMemberAdded(CUSTOMER_GROUP_ID, TEAM_MEMBER_1_ID)
// Simulate Grok join timeout — activateGrok's revertStateOnFail runs.
releaseJoin(false)
await bot.flush()
// Fix asserts: revert guard sees state != "GROK" and leaves TEAM-PENDING alone.
expect(chat.customData.get(CUSTOMER_GROUP_ID)?.state).toBe("TEAM-PENDING")
})
test("first team text writes state=TEAM via gate", async () => {
await reachTeamPending()
addBotMessage("We will reply within 24 hours.")
@@ -127,6 +127,8 @@ Per-conversation state is stored in the group's `customData` and written at the
**State is authoritative and monotonic.** Once written, `customData.state` persists across member leave/join events. The only path that clears it is the existing `onLeftMember` handler when the customer themselves leaves — at that point the entire customData is cleared.
**Failure-path revert is CAS-guarded.** `activateGrok` runs fire-and-forget, so its `setStateOnFail` revert (`QUEUE`) can race with a concurrent transition (e.g. `/team` writing `TEAM-PENDING` while `waitForGrokJoin` is pending). To preserve monotonicity, `revertStateOnFail` is a compare-and-set: it only writes `setStateOnFail` if `customData.state === "GROK"` (the optimistic value both call sites write before invoking `activateGrok`). If another handler has since stamped a different state, the revert is skipped — the in-flight transition wins and stays.
TEAM-PENDING takes priority over GROK when both Grok and team are present (after `/team` but before team member's first message). `/grok` remains available in TEAM-PENDING — if Grok is not yet in the group, it gets invited; if already present, the command is ignored.
**State derivation helpers:**
@@ -519,7 +521,7 @@ Type signatures affected:
3. Drain `bufferedGrokInvitations` — if the `receivedGroupInvitation` event arrived during step 1's await (race condition), process it now.
4. Set `grokInitialResponsePending.add(groupId)` — suppresses per-message responses from `onGrokNewChatItems` for this group until the initial combined response completes. Without this gate, the message backlog arriving via `newChatItems` would trigger individual per-message responses racing with the initial combined response — producing duplicate replies (e.g., 3 replies for 2 messages).
5. `waitForGrokJoin(120s)` — awaits resolver from Grok profile's `connectedToGroupMember` (step 8 below)
6. Timeout → notify customer (`grokUnavailableMessage`), send queue message if was WELCOME→GROK, fall back to QUEUE, clear `grokInitialResponsePending`
6. Timeout → notify customer (`grokUnavailableMessage`), send queue message if was WELCOME→GROK, fall back to QUEUE (CAS-guarded: only if `customData.state` is still `GROK` — a concurrent `/team` that switched to `TEAM-PENDING` is respected), clear `grokInitialResponsePending`
**Grok profile side (independent, triggered by its own events):**
7. `receivedGroupInvitation` → look up `pendingGrokJoins` by `evt.groupInfo.membership.memberId`. If found, auto-accept via `apiJoinGroup(groupId)`, set up `grokGroupMap` and `reverseGrokMap`. If not found (race: event arrived before step 2), buffer in `bufferedGrokInvitations` for step 3. Grok is NOT yet connected — cannot read history or send messages.
+4 -3
View File
@@ -563,9 +563,10 @@ export class SupportBot {
if (!this.grokApi) return
const grokApi = this.grokApi
const revertStateOnFail = async () => {
if (opts.setStateOnFail) {
await this.cards.mergeCustomData(groupId, {state: opts.setStateOnFail})
}
if (!opts.setStateOnFail) return
const current = await this.cards.getRawCustomData(groupId)
if (current?.state !== "GROK") return
await this.cards.mergeCustomData(groupId, {state: opts.setStateOnFail})
}
if (this.config.grokContactId === null) {
await revertStateOnFail()