bugfix: ensure we release the lock (#3628)

The `case nats.ErrConsumerDeleted, nats.ErrConsumerNotFound:` bit was
merge conflicted in https://github.com/element-hq/dendrite/pull/3588 so
it broke the locking order.
This commit is contained in:
Kegan Dougal 2025-08-12 20:08:30 +01:00 committed by GitHub
parent f24688af11
commit d4c5804ad2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -271,7 +271,7 @@ func (w *worker) _next() {
})
msgs, err := w.subscription.Fetch(1, nats.Context(ctx))
switch err {
case nil, nats.ErrTimeout, context.DeadlineExceeded, context.Canceled:
case nil:
// Is the server shutting down? If so, stop processing.
if w.r.ProcessContext.Context().Err() != nil {
return
@ -279,26 +279,27 @@ func (w *worker) _next() {
// Make sure that once we're done here, we queue up another call
// to _next in the inbox.
defer w.Act(nil, w._next)
// If no error was reported, but we didn't get exactly one message,
// then skip over this and try again on the next iteration.
if len(msgs) != 1 {
case nats.ErrTimeout, context.DeadlineExceeded, context.Canceled:
// Is the server shutting down? If so, stop processing.
if w.r.ProcessContext.Context().Err() != nil {
return
}
case context.DeadlineExceeded, context.Canceled:
// The context exceeded, so we've been waiting for more than a
// minute for activity in this room. At this point we will shut
// down the subscriber to free up resources. It'll get started
// again if new activity happens.
w.Lock()
defer w.Unlock()
// inside the lock, let's check if the ephemeral consumer saw something new!
// If so, we do have new messages after all, they just came at a bad time.
if w.ephemeralSeq > w.durableSeq {
w.Act(nil, w._next)
w.Unlock()
return
}
w.Unlock()
case nats.ErrConsumerDeleted, nats.ErrConsumerNotFound:
w.Lock()
defer w.Unlock()
// The consumer is gone, therefore it's reached the inactivity
// threshold. Clean up and stop processing at this point, if a
// new event comes in for this room then the ordered consumer
@ -308,7 +309,6 @@ func (w *worker) _next() {
}
w.subscription = nil
return
default:
// Something went wrong while trying to fetch the next event
// from the queue. In which case, we'll shut down the subscriber
@ -329,6 +329,12 @@ func (w *worker) _next() {
// Since we either Ack() or Term() the message at this point, we can defer decrementing the room backpressure
defer roomserverInputBackpressure.With(prometheus.Labels{"room_id": w.roomID}).Dec()
// If no error was reported, but we didn't get exactly one message,
// then skip over this and try again on the next iteration.
if len(msgs) != 1 {
return
}
// Try to unmarshal the input room event. If the JSON unmarshalling
// fails then we'll terminate the message — this notifies NATS that
// we are done with the message and never want to see it again.