From 52747cc1752b8a8e35aee47db2f55d3a3c365ce3 Mon Sep 17 00:00:00 2001 From: Keith Owens Date: Fri, 16 Sep 2011 17:55:28 +0200 Subject: workaround for race between waitpid(-1) and waitpid(pid) Sometimes waitpid(-1) in cb_check_data reaps a child that has just terminated, before waitpid(pid) in wait_process can reap it. Linux waitpid(pid,,WNOHANG) may not return an error if the child has already terminated, which results in wait_process looping waiting for the child which has already terminated. The symptom is a spurious "timeout waiting for filter command to exit". --- src/proxsmtpd.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/proxsmtpd.c b/src/proxsmtpd.c index 278ecee..8f50d96 100644 --- a/src/proxsmtpd.c +++ b/src/proxsmtpd.c @@ -925,6 +925,11 @@ static int wait_process(spctx_t* sp, pid_t pid, int* status) switch(waitpid(pid, status, WNOHANG)) { case 0: + /* Linux may return 0 if the task has already terminated and was + * caught by waitpid(-1) above, double check it still exists. + */ + if (kill(pid, 0) < 0 && errno == ESRCH) + return 0; break; case -1: if(errno != ECHILD && errno != ESRCH) -- cgit v1.2.3