fix(app): guard against possible race conditions during enqueue

psychedelicious · psychedelicious · commit 7cced2e93a54 · 2025-06-11T15:26:20.000+10:00
In #7724 we made a number of perf optimisations related to enqueuing. One of these optimisations included moving the enqueue logic - including expensive prep work and db writes - to a separate thread. At the same time manual DB locking was abandoned in favor of WAL mode. Finally, we set `check_same_thread=False` to allow multiple threads to access the connection at a given time. I think this may be the cause of #7950: - We start an enqueue in a thread (running in bg) - We dequeue - Dequeue pulls a partially-written queue item from DB and we get the errors in the linked issue To be honest, I don't understand enough about SQLite to confidently say that this kind of race condition is actually possible. But: - The error started popping up around the time we made this change. - I have reviewed the logic from enqueue to dequeue very carefully _many_ times over the past month or so, and I am confident that the error is only possible if we are getting unexpectedly `NULL` values from the DB. - The DB schema includes `NOT NULL` constraints for the column that is apparently returning `NULL`. - Therefore, without some kind of race condition or schema issue, the error should not be possible. - The `enqueue_batch` call is the only place I can find where we have the possibility of a race condition due to async logic. Everywhere else, all DB interaction for the queue is synchronous, as far as I can tell. This change retains the perf benefits by running the heavy enqueue prep logic in a separate thread, but moves back to the main thread for the DB write. It also uses an explicit transaction for the write. Will just have to wait and see if this fixes the issue.
diff --git a/invokeai/app/services/session_queue/session_queue_sqlite.py b/invokeai/app/services/session_queue/session_queue_sqlite.py
@@ -104,11 +104,7 @@ def _get_highest_priority(self, queue_id: str) -> int:
         return cast(Union[int, None], cursor.fetchone()[0]) or 0
 
     async def enqueue_batch(self, queue_id: str, batch: Batch, prepend: bool) -> EnqueueBatchResult:
-        return await asyncio.to_thread(self._enqueue_batch, queue_id, batch, prepend)
-
-    def _enqueue_batch(self, queue_id: str, batch: Batch, prepend: bool) -> EnqueueBatchResult:
         try:
-            cursor = self._conn.cursor()
             # TODO: how does this work in a multi-user scenario?
             current_queue_size = self._get_current_queue_size(queue_id)
             max_queue_size = self.__invoker.services.configuration.max_queue_size
@@ -118,28 +114,29 @@ def _enqueue_batch(self, queue_id: str, batch: Batch, prepend: bool) -> EnqueueB
             if prepend:
                 priority = self._get_highest_priority(queue_id) + 1
 
-            requested_count = calc_session_count(batch)
-            values_to_insert = prepare_values_to_insert(
+            requested_count = await asyncio.to_thread(
+                calc_session_count,
+                batch=batch,
+            )
+            values_to_insert = await asyncio.to_thread(
+                prepare_values_to_insert,
                 queue_id=queue_id,
                 batch=batch,
                 priority=priority,
                 max_new_queue_items=max_new_queue_items,
             )
             enqueued_count = len(values_to_insert)
 
-            if requested_count > enqueued_count:
-                values_to_insert = values_to_insert[:max_new_queue_items]
-
-            cursor.executemany(
-                """--sql
-                INSERT INTO session_queue (queue_id, session, session_id, batch_id, field_values, priority, workflow, origin, destination, retried_from_item_id)
-                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
-                """,
-                values_to_insert,
-            )
-            self._conn.commit()
+            with self._conn:
+                cursor = self._conn.cursor()
+                cursor.executemany(
+                    """--sql
+                    INSERT INTO session_queue (queue_id, session, session_id, batch_id, field_values, priority, workflow, origin, destination, retried_from_item_id)
+                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+                    """,
+                    values_to_insert,
+                )
         except Exception:
-            self._conn.rollback()
             raise
         enqueue_result = EnqueueBatchResult(
             queue_id=queue_id,