首页 > 解决方案 > waitpid() 在特定情况下返回未知错误

问题描述

我创建了父进程和子进程。然后从子进程中,我又创建了 3 个大子进程。

从子进程,SIGTERM 被发送到大子进程并等待大子进程使用 waitpid() 终止。

waitpid() 为 3 个大子进程中的一个大子进程返回未知错误。waitpid() 不会为其他 2 个大子进程返回未知错误。

我编译了代码并运行它。然后,我收到了如下消息。

$ ./a.out
shm_create : shmid = 0
main_process
child process pid = 1953
in grand_chile_process2 : pid = 1955
in grand_child_process2-1
g_child_id->pid[0] = 0
g_child_id->pid[1] = 1955
g_child_id->pid[2] = 0
in process2-2
in grand_child_process1 : pid = 1954
in process1-1
g_child_id->pid[0] = 1954
g_child_id->pid[1] = 1955
g_child_id->pid[2] = 0
in process1-2
in grand_child_process3 : pid = 1956
in process3-1
g_child_id->pid[0] = 1954
g_child_id->pid[1] = 1955
g_child_id->pid[2] = 1956
in process3-2

如果我检查正在运行的进程,我会看到如下 5 个进程。

# ps -eLf | grep a.out
user1       1952    1684    1952  0    1 13:51 pts/2    00:00:00 ./a.out
user1       1953    1952    1953  0    1 13:51 pts/2    00:00:00 ./a.out
user1       1954    1953    1954  0    1 13:51 pts/2    00:00:00 ./a.out
user1       1955    1953    1955  0    1 13:51 pts/2    00:00:00 ./a.out
user1       1956    1954    1956  0    1 13:51 pts/2    00:00:00 ./a.out
root        1958    1885    1958  0    1 13:51 pts/4    00:00:00 grep --color=auto a.out

如果我输入“a”,那么我会看到如下错误消息。

a
Terminating...: pid = 1956
error : waitpid (No child processes)

进程列表如下图所示。

# ps -eLf | grep a.out
user1       1952    1684    1952  0    1 13:51 pts/2    00:00:00 ./a.out
user1       1953    1952    1953  0    1 13:51 pts/2    00:00:00 [a.out] <defunct>
user1       1954       1    1954  0    1 13:51 pts/2    00:00:00 ./a.out
user1       1955       1    1955  0    1 13:51 pts/2    00:00:00 ./a.out
user1       1956    1954    1956  0    1 13:51 pts/2    00:00:00 [a.out] <defunct>
root        1960    1885    1960  0    1 13:51 pts/4    00:00:00 grep --color=auto a.out

在这里,如下面的源代码所示,我首先发送 SIGTERM 到 grand_child_process3(),然后在第 218~246 行之间发送 grand_child_process2() 和 grand_child_process1()。

我进行了更改,按照 grand_child_process1()、grand_child_process2() 和 grand_child_process3() 的顺序发送 SIGTERM。

然后,我再次可以看到如下输出。

$ ./a.out
shm_create : shmid = 0
main_process
child process pid = 1985
in grand_chile_process2 : pid = 1987
in grand_child_process2-1
g_child_id->pid[0] = 0
g_child_id->pid[1] = 1987
g_child_id->pid[2] = 0
in process2-2
in grand_child_process1 : pid = 1986
in process1-1
g_child_id->pid[0] = 1986
g_child_id->pid[1] = 1987
g_child_id->pid[2] = 0
in process1-2
in grand_child_process3 : pid = 1988
in process3-1
g_child_id->pid[0] = 1986
g_child_id->pid[1] = 1987
g_child_id->pid[2] = 1988
in process3-2

进程列表如下所示。

# ps -eLf | grep a.out
user1       1984    1684    1984  0    1 14:03 pts/2    00:00:00 ./a.out
user1       1985    1984    1985  0    1 14:03 pts/2    00:00:00 ./a.out
user1       1986    1985    1986  0    1 14:03 pts/2    00:00:00 ./a.out
user1       1987    1985    1987  0    1 14:03 pts/2    00:00:00 ./a.out
user1       1988    1986    1988  0    1 14:03 pts/2    00:00:00 ./a.out
root        1990    1885    1990  0    1 14:03 pts/4    00:00:00 grep --color=auto a.out

然后,再次按下“a”,输出如下所示。

a
Terminating...: pid = 1986
pid = 1986
child process[1986] killed by signal 15 (Terminated)
Terminating...: pid = 1987
pid = 1987
child process[1987] killed by signal 15 (Terminated)
Terminating...: pid = 1988
error : waitpid (No child processes)

然后,进程列表变成了这样。

# ps -eLf | grep a.out
user1       1984    1684    1984  0    1 14:03 pts/2    00:00:00 ./a.out
user1       1985    1984    1985  0    1 14:03 pts/2    00:00:00 [a.out] <defunct>
root        1992    1885    1992  0    1 14:05 pts/4    00:00:00 grep --color=auto a.out

我的问题是为什么 grand_child_process3() 没有正确终止。

如果您发现我做错了什么,请告诉我。这是我正在使用的完整源代码。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <signal.h>
#include <error.h>
#include <errno.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/types.h>
#include <sys/wait.h>

#define KEY_NUM 1234

pid_t g_child1, g_child2, g_child3;
int shmid;

struct _process_id {
   pid_t pid[3];
};

typedef struct _process_id G_CHILD_ID_t;

int shared_memory_create()
{
   void *shared_memory;

   shmid = shmget((key_t)KEY_NUM, sizeof(G_CHILD_ID_t), IPC_CREAT|0666);
   if (shmid == -1)
   {
      printf("shmget failed\n");
      exit(0);
   }
   printf("shm_create : shmid = %d\n", shmid);

   shared_memory = shmat(shmid, NULL, 0);
   if (shared_memory == (void *)-1)
   {
      printf("shmat failed\n");
      exit(0);
   }
   memset(shared_memory, 0x0, sizeof(G_CHILD_ID_t));

   if(shmdt(shared_memory) == -1)
   {
      printf("shmdt failed\n");
      exit(0);
   }

   return 0;
}

int grand_child_process1(void)
{
   G_CHILD_ID_t *g_child_id;
   void *shared_memory;
   int i = 0;

   g_child1 = getpid();
   printf("in grand_child_process1 : pid = %d\n", g_child1);

   shared_memory = shmat(shmid, NULL, 0);
   if (shared_memory == (void *)-1)
   {
      printf("shmat failed\n");
      exit(0);
   }

   g_child_id = (G_CHILD_ID_t *)shared_memory;
   g_child_id->pid[0] = g_child1;
   printf("in process1-1\n");
   for (i = 0 ; i < 3 ; i++)
      printf("g_child_id->pid[%d] = %d\n", i, g_child_id->pid[i]);


   if(shmdt(shared_memory) == -1)
   {
      printf("shmdt failed\n");
      exit(0);
   }

   printf("in process1-2\n");
   while(1)
   {
      sleep(1);
   }
}

int grand_child_process2(void)
{
   G_CHILD_ID_t *g_child_id;
   void *shared_memory;
   int i = 0;

   g_child2 = getpid();
   printf("in grand_chile_process2 : pid = %d\n", g_child2);

   shared_memory = shmat(shmid, NULL, 0);
   if (shared_memory == (void *)-1)
   {
      printf("shmat failed\n");
      exit(0);
   }

   g_child_id = (G_CHILD_ID_t *)shared_memory;
   g_child_id->pid[1] = g_child2;
   printf("in grand_child_process2-1\n");
   for (i = 0 ; i < 3 ; i++)
      printf("g_child_id->pid[%d] = %d\n", i, g_child_id->pid[i]);

   if(shmdt(shared_memory) == -1)
   {
      printf("shmdt failed\n");
      exit(0);
   }

   printf("in process2-2\n");
   while(1)
   {
      sleep(1);
   }
}

int grand_child_process3(void)
{
   G_CHILD_ID_t *g_child_id;
   void *shared_memory;
   int i = 0;

   g_child3 = getpid();
   printf("in grand_child_process3 : pid = %d\n", g_child3);

   shared_memory = shmat(shmid, NULL, 0);
   if (shared_memory == (void *)-1)
   {
      printf("shmat failed\n");
      exit(0);
   }

   g_child_id = (G_CHILD_ID_t *)shared_memory;
   g_child_id->pid[2] = g_child3;
   printf("in process3-1\n");
   for (i = 0 ; i < 3 ; i++)
      printf("g_child_id->pid[%d] = %d\n", i, g_child_id->pid[i]);

   if(shmdt(shared_memory) == -1)
   {
      printf("shmdt failed\n");
      exit(0);
   }

   printf("in process3-2\n");
   while(1)
   {
      sleep(1);
   }
}

void printWaitStatus(pid_t pid, int status)
{
   printf("pid = %d\n", pid);
   if (WIFEXITED(status))
   {
      printf("child process[%d] exited, status = %d\n", pid, WEXITSTATUS(status));
   }
   else if (WIFSIGNALED(status))
   {
      printf("child process[%d] killed by signal %d (%s)\n", pid, WTERMSIG(status), strsignal(WTERMSIG(status)));
   }
   else if (WIFSTOPPED(status))
   {
      printf("child process[%d] stopped by signal %d (%s)\n", pid, WSTOPSIG(status), strsignal(WSTOPSIG(status)));
   }
}

int child_process(void)
{
   G_CHILD_ID_t *g_child_id;
   void *shared_memory;
   int wstatus;
   int err;

   pid_t p1, p2;
   char ch;

   printf("child process pid = %d\n", getpid());

   p1 = fork();
   p2 = fork();

   if (p1 == 0 && p2 > 0)
   {
      grand_child_process1();
   }
   else if (p1 > 0 && p2 == 0)
   {
      grand_child_process2();
   }
   else if (p1 == 0 && p2 == 0)
   {
      grand_child_process3();
   }

   shared_memory = shmat(shmid, NULL, 0);
   if (shared_memory == (void *)-1)
   {
      printf("shmat failed\n");
      exit(0);
   }

   g_child_id = (G_CHILD_ID_t *)shared_memory;

   while (1)
   {
      ch = (char)getchar();
      if (ch == 'a')
      {
         printf("Terminating...: pid = %d\n", g_child_id->pid[2]);
         kill(g_child_id->pid[2], SIGTERM);
         err = waitpid(g_child_id->pid[2], &wstatus, WUNTRACED | WCONTINUED);
         if (err == -1)
         {
            printf("error : waitpid (%s)\n", strerror(errno));
            exit(EXIT_FAILURE);
         }
         printWaitStatus(g_child_id->pid[2], wstatus);

         printf("Terminating...: pid = %d\n", g_child_id->pid[1]);
         kill(g_child_id->pid[1], SIGTERM);
         err = waitpid(g_child_id->pid[1], &wstatus, WUNTRACED | WCONTINUED);
         if (err == -1)
         {
            printf("error : waitpid (%s)\n", strerror(errno));
            exit(EXIT_FAILURE);
         }
         printWaitStatus(g_child_id->pid[1], wstatus);

         printf("Terminating...: pid = %d\n", g_child_id->pid[0]);
         kill(g_child_id->pid[0], SIGTERM);
         err = waitpid(g_child_id->pid[0], &wstatus, WUNTRACED | WCONTINUED);
         if (err == -1)
         {
            printf("error : waitpid (%s)\n", strerror(errno));
            exit(EXIT_FAILURE);
         }
         printWaitStatus(g_child_id->pid[0], wstatus);

         if(shmdt(shared_memory) == -1)
         {
            printf("shmdt failed\n");
            exit(0);
         }
      }
      else
         sleep(1);
   }
   return 0;
}

int main_process(void)
{
   printf("main_process \n");
   while (1)
   {
      sleep(1);
   }
}

int main(void)
{
   pid_t pid;

   shared_memory_create();
   pid = fork();
   if (pid == 0)
   {
      child_process();
   }
   else
   {
      main_process();
   }
}

标签: clinuxprocesswaitpidsigterm

解决方案


推荐阅读