Better bad state recovery from long executions

This commit is contained in:
Oracle 2025-01-21 17:45:07 +01:00
parent 0601ede9d2
commit 5503d22a6d

View File

@ -127,9 +127,23 @@ class AsyncronWorker:
status_reason = "Worker died during execution", status_reason = "Worker died during execution",
status = "A", worker_lock = None status = "A", worker_lock = None
) )
#DONT print anything in here!
#if count: print(f"Had to cancel {count} task(s).") #cls.log.warning #if count: print(f"Had to cancel {count} task(s).") #cls.log.warning
self.model.delete()
# Almost always, a worker can delete it's model from the db,
# But it seems that there is a sitation where despite Task.worker_lock being delete=SET_NULL,
# That still a race condition can cause an IntegrityError, If I'm correct about the root cause,
# just attempting to self delete with a small interval, Should Fix the issue.
# Error example:
# - django.db.utils.IntegrityError: update or delete on table "asyncron_worker" violates foreign key constraint "asyncron_task_worker_lock_id_0bb55026_fk_asyncron_worker_id" on table "asyncron_task"
for attempt in range(3):
try:
self.model.delete()
except IntegrityError:
time.sleep( 0.1 )
else: break
#self.loop.call_soon(self.started.set) #self.loop.call_soon(self.started.set)
def attach_django_signals( self ): def attach_django_signals( self ):
@ -194,9 +208,8 @@ class AsyncronWorker:
loop_wait = 0 loop_wait = 0
else: else:
await Worker.objects.filter( await Worker.objects.filter( is_master = True ).filter(
is_master = True, models.Q(last_crowning_attempt = None) | models.Q(last_crowning_attempt__lte = timezone.now() - timezone.timedelta( minutes = 5 ))
last_crowning_attempt__lte = timezone.now() - timezone.timedelta( minutes = 5 )
).aupdate( is_master = False ) ).aupdate( is_master = False )