feat: add cleanup for stale function executions

Adds a new interval task that marks executions stuck in 'processing'
status for more than 30 minutes as 'failed' with a timeout error.
This commit is contained in:
Chirag Aggarwal
2026-01-16 13:59:56 +05:30
parent ce91b1a03d
commit f5a61fb4d6
3 changed files with 54 additions and 1 deletions
+1
View File
@@ -102,6 +102,7 @@ _APP_STATS_RESOURCES_INTERVAL=30
_APP_MAINTENANCE_RETENTION_USAGE_HOURLY=8640000
_APP_MAINTENANCE_RETENTION_SCHEDULES=86400
_APP_INTERVAL_DOMAIN_VERIFICATION=60
_APP_INTERVAL_CLEANUP_STALE_EXECUTIONS=300
_APP_USAGE_STATS=enabled
_APP_LOGGING_CONFIG=
_APP_LOGGING_CONFIG_REALTIME=
+1
View File
@@ -880,6 +880,7 @@ services:
- _APP_DB_PASS
- _APP_DATABASE_SHARED_TABLES
- _APP_INTERVAL_DOMAIN_VERIFICATION
- _APP_INTERVAL_CLEANUP_STALE_EXECUTIONS
appwrite-task-stats-resources:
container_name: appwrite-task-stats-resources
+52 -1
View File
@@ -24,22 +24,30 @@ class Interval extends Action
$this
->desc('Schedules tasks on regular intervals by publishing them to our queues')
->inject('dbForPlatform')
->inject('getProjectDB')
->inject('queueForCertificates')
->callback($this->action(...));
}
public function action(Database $dbForPlatform, Certificate $queueForCertificates): void
public function action(Database $dbForPlatform, callable $getProjectDB, Certificate $queueForCertificates): void
{
Console::title('Interval V1');
Console::success(APP_NAME . ' interval process v1 has started');
$intervalDomainVerification = (int) System::getEnv('_APP_INTERVAL_DOMAIN_VERIFICATION', '60'); // 1 minute
$intervalCleanupStaleExecutions = (int) System::getEnv('_APP_INTERVAL_CLEANUP_STALE_EXECUTIONS', '300'); // 5 minutes
\go(function () use ($dbForPlatform, $queueForCertificates, $intervalDomainVerification) {
Console::loop(function () use ($dbForPlatform, $queueForCertificates) {
$this->verifyDomain($dbForPlatform, $queueForCertificates);
}, $intervalDomainVerification);
});
\go(function () use ($dbForPlatform, $getProjectDB, $intervalCleanupStaleExecutions) {
Console::loop(function () use ($dbForPlatform, $getProjectDB) {
$this->cleanupStaleExecutions($dbForPlatform, $getProjectDB);
}, $intervalCleanupStaleExecutions);
});
}
private function verifyDomain(Database $dbForPlatform, Certificate $queueForCertificates): void
@@ -72,4 +80,47 @@ class Interval extends Action
->trigger();
}
}
private function cleanupStaleExecutions(Database $dbForPlatform, callable $getProjectDB): void
{
$time = DatabaseDateTime::now();
$staleThreshold = DatabaseDateTime::addSeconds(new DateTime(), -1200); // 20 minutes ago
Console::info("[{$time}] Starting cleanup of stale executions");
$dbForPlatform->foreach(
'projects',
function (Document $project) use ($getProjectDB, $time, $staleThreshold) {
try {
$dbForProject = $getProjectDB($project);
$staleExecutions = $dbForProject->find('executions', [
Query::equal('status', ['processing']),
Query::lessThan('$createdAt', $staleThreshold),
Query::limit(100),
]);
if (\count($staleExecutions) === 0) {
return;
}
Console::info("[{$time}] Found " . \count($staleExecutions) . " stale executions in project {$project->getId()}");
foreach ($staleExecutions as $execution) {
$execution->setAttribute('status', 'failed');
$execution->setAttribute('errors', 'Execution timed out');
$dbForProject->updateDocument('executions', $execution->getId(), $execution);
}
} catch (\Throwable $th) {
Console::error("[{$time}] Failed to cleanup stale executions for project {$project->getId()}: " . $th->getMessage());
}
},
[
Query::equal('region', [System::getEnv('_APP_REGION', 'default')]),
Query::limit(100),
]
);
Console::info("[{$time}] Completed cleanup of stale executions");
}
}