feat: add engine alive indicator, debug mode, and orchestrator retry logic
This commit is contained in:
committed by
Loic Coenen (aider)
parent
e79c2ac116
commit
f2993eac80
138
orchestrator.c
138
orchestrator.c
@@ -1,3 +1,10 @@
|
||||
/*
|
||||
* orchestrator.c - Launches both the engine and client processes,
|
||||
* forwards signals, and waits for either to exit before cleaning up
|
||||
* the other. If a child exits abnormally it is retried up to 3 times.
|
||||
*/
|
||||
#define _GNU_SOURCE
|
||||
#define _POSIX_C_SOURCE 200809L
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
@@ -8,27 +15,44 @@
|
||||
static pid_t engine_pid = 0;
|
||||
static pid_t client_pid = 0;
|
||||
|
||||
static void cleanup(int sig) {
|
||||
(void)sig;
|
||||
static void terminate_children(void) {
|
||||
if (engine_pid > 0) kill(engine_pid, SIGTERM);
|
||||
if (client_pid > 0) kill(client_pid, SIGTERM);
|
||||
while (wait(NULL) > 0);
|
||||
}
|
||||
|
||||
static void wait_children(void) {
|
||||
int status;
|
||||
while (waitpid(-1, &status, 0) > 0);
|
||||
}
|
||||
|
||||
static void cleanup(int sig) {
|
||||
(void)sig;
|
||||
terminate_children();
|
||||
wait_children();
|
||||
_exit(0);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
signal(SIGINT, cleanup);
|
||||
signal(SIGTERM, cleanup);
|
||||
|
||||
engine_pid = fork();
|
||||
if (engine_pid == 0) {
|
||||
static pid_t start_engine(void) {
|
||||
pid_t pid = fork();
|
||||
if (pid == -1) {
|
||||
perror("fork engine");
|
||||
return -1;
|
||||
}
|
||||
if (pid == 0) {
|
||||
execl("./engine/looper", "looper", NULL);
|
||||
perror("execl engine");
|
||||
_exit(1);
|
||||
}
|
||||
return pid;
|
||||
}
|
||||
|
||||
client_pid = fork();
|
||||
if (client_pid == 0) {
|
||||
static pid_t start_client(int argc, char *argv[]) {
|
||||
pid_t pid = fork();
|
||||
if (pid == -1) {
|
||||
perror("fork client");
|
||||
return -1;
|
||||
}
|
||||
if (pid == 0) {
|
||||
if (argc > 2 && strcmp(argv[1], "-s") == 0) {
|
||||
execl("./client/looper-client", "looper-client", "-s", argv[2], NULL);
|
||||
} else {
|
||||
@@ -37,15 +61,85 @@ int main(int argc, char *argv[]) {
|
||||
perror("execl client");
|
||||
_exit(1);
|
||||
}
|
||||
|
||||
int status;
|
||||
pid_t exited = wait(&status);
|
||||
if (exited == engine_pid) {
|
||||
kill(client_pid, SIGTERM);
|
||||
wait(NULL);
|
||||
} else if (exited == client_pid) {
|
||||
kill(engine_pid, SIGTERM);
|
||||
wait(NULL);
|
||||
}
|
||||
return 0;
|
||||
return pid;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
signal(SIGINT, cleanup);
|
||||
signal(SIGTERM, cleanup);
|
||||
|
||||
int i;
|
||||
for (i = 1; i < argc; i++) {
|
||||
if (strcmp(argv[i], "--debug") == 0) {
|
||||
setenv("LOOPER_DEBUG", "1", 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int attempt = 0;
|
||||
const int MAX_ATTEMPTS = 3;
|
||||
|
||||
while (attempt < MAX_ATTEMPTS) {
|
||||
attempt++;
|
||||
|
||||
engine_pid = start_engine();
|
||||
if (engine_pid == -1) {
|
||||
if (attempt >= MAX_ATTEMPTS) {
|
||||
fprintf(stderr, "Failed to start engine after %d attempts\n", MAX_ATTEMPTS);
|
||||
return 1;
|
||||
}
|
||||
usleep(500000);
|
||||
continue;
|
||||
}
|
||||
|
||||
client_pid = start_client(argc, argv);
|
||||
if (client_pid == -1) {
|
||||
kill(engine_pid, SIGTERM);
|
||||
waitpid(engine_pid, NULL, 0);
|
||||
if (attempt >= MAX_ATTEMPTS) {
|
||||
fprintf(stderr, "Failed to start client after %d attempts\n", MAX_ATTEMPTS);
|
||||
return 1;
|
||||
}
|
||||
usleep(500000);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Both children have started. Wait for either to exit. */
|
||||
int status;
|
||||
pid_t exited = waitpid(-1, &status, 0);
|
||||
pid_t other = 0;
|
||||
if (exited == engine_pid) {
|
||||
other = client_pid;
|
||||
} else if (exited == client_pid) {
|
||||
other = engine_pid;
|
||||
} else {
|
||||
/* unexpected waitpid failure */
|
||||
terminate_children();
|
||||
wait_children();
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Kill the other child now that one has exited. */
|
||||
if (other > 0) {
|
||||
kill(other, SIGTERM);
|
||||
waitpid(other, NULL, 0);
|
||||
}
|
||||
|
||||
/* Normal clean exit (zero status) means we are done. */
|
||||
if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (attempt >= MAX_ATTEMPTS) {
|
||||
fprintf(stderr, "Child exited abnormally after %d attempts. Quitting.\n",
|
||||
MAX_ATTEMPTS);
|
||||
return 1;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Child exited abnormally, retrying...\n");
|
||||
usleep(500000);
|
||||
/* loop back to try another fresh start */
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user