feat: add engine alive indicator, debug mode, and orchestrator retry logic

This commit is contained in:
Loic Coenen
2026-05-20 20:59:58 +00:00
committed by Loic Coenen (aider)
parent e79c2ac116
commit f2993eac80
5 changed files with 139 additions and 28 deletions

View File

@@ -1,3 +1,10 @@
/*
* orchestrator.c - Launches both the engine and client processes,
* forwards signals, and waits for either to exit before cleaning up
* the other. If a child exits abnormally it is retried up to 3 times.
*/
#define _GNU_SOURCE
#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -8,27 +15,44 @@
static pid_t engine_pid = 0;
static pid_t client_pid = 0;
static void cleanup(int sig) {
(void)sig;
static void terminate_children(void) {
if (engine_pid > 0) kill(engine_pid, SIGTERM);
if (client_pid > 0) kill(client_pid, SIGTERM);
while (wait(NULL) > 0);
}
static void wait_children(void) {
int status;
while (waitpid(-1, &status, 0) > 0);
}
static void cleanup(int sig) {
(void)sig;
terminate_children();
wait_children();
_exit(0);
}
int main(int argc, char *argv[]) {
signal(SIGINT, cleanup);
signal(SIGTERM, cleanup);
engine_pid = fork();
if (engine_pid == 0) {
static pid_t start_engine(void) {
pid_t pid = fork();
if (pid == -1) {
perror("fork engine");
return -1;
}
if (pid == 0) {
execl("./engine/looper", "looper", NULL);
perror("execl engine");
_exit(1);
}
return pid;
}
client_pid = fork();
if (client_pid == 0) {
static pid_t start_client(int argc, char *argv[]) {
pid_t pid = fork();
if (pid == -1) {
perror("fork client");
return -1;
}
if (pid == 0) {
if (argc > 2 && strcmp(argv[1], "-s") == 0) {
execl("./client/looper-client", "looper-client", "-s", argv[2], NULL);
} else {
@@ -37,15 +61,85 @@ int main(int argc, char *argv[]) {
perror("execl client");
_exit(1);
}
int status;
pid_t exited = wait(&status);
if (exited == engine_pid) {
kill(client_pid, SIGTERM);
wait(NULL);
} else if (exited == client_pid) {
kill(engine_pid, SIGTERM);
wait(NULL);
}
return 0;
return pid;
}
int main(int argc, char *argv[]) {
signal(SIGINT, cleanup);
signal(SIGTERM, cleanup);
int i;
for (i = 1; i < argc; i++) {
if (strcmp(argv[i], "--debug") == 0) {
setenv("LOOPER_DEBUG", "1", 1);
break;
}
}
int attempt = 0;
const int MAX_ATTEMPTS = 3;
while (attempt < MAX_ATTEMPTS) {
attempt++;
engine_pid = start_engine();
if (engine_pid == -1) {
if (attempt >= MAX_ATTEMPTS) {
fprintf(stderr, "Failed to start engine after %d attempts\n", MAX_ATTEMPTS);
return 1;
}
usleep(500000);
continue;
}
client_pid = start_client(argc, argv);
if (client_pid == -1) {
kill(engine_pid, SIGTERM);
waitpid(engine_pid, NULL, 0);
if (attempt >= MAX_ATTEMPTS) {
fprintf(stderr, "Failed to start client after %d attempts\n", MAX_ATTEMPTS);
return 1;
}
usleep(500000);
continue;
}
/* Both children have started. Wait for either to exit. */
int status;
pid_t exited = waitpid(-1, &status, 0);
pid_t other = 0;
if (exited == engine_pid) {
other = client_pid;
} else if (exited == client_pid) {
other = engine_pid;
} else {
/* unexpected waitpid failure */
terminate_children();
wait_children();
return 1;
}
/* Kill the other child now that one has exited. */
if (other > 0) {
kill(other, SIGTERM);
waitpid(other, NULL, 0);
}
/* Normal clean exit (zero status) means we are done. */
if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
return 0;
}
if (attempt >= MAX_ATTEMPTS) {
fprintf(stderr, "Child exited abnormally after %d attempts. Quitting.\n",
MAX_ATTEMPTS);
return 1;
}
fprintf(stderr, "Child exited abnormally, retrying...\n");
usleep(500000);
/* loop back to try another fresh start */
}
return 1;
}