首页 > 解决方案 > 为什么子进程中的管道有时会中断,有时不会?


我正在使用 Perl 开发一个带有主脚本的系统,该脚本需要执行几个不同的工作脚本,这些脚本同时在系统上执行不同的任务。
这些脚本也不需要用 Perl 编写。它们可以是任何可在命令行上执行并通过STDOUTSTDERREXITCODE.



print STDERR "HELLO ERROR 0 !\n";

print "hello script!!\n";

print "hello waiting 3 sec ...\n";

sleep 3;

print "hello list:\n";
print "cmd: 'ls -lah'\n";

print `ls -lah`;

print "res: '$?'\n";    

print "hello waiting 2 sec ...\n";

sleep 2;    

print STDERR "HELLO ERROR 1 !\n";


Master Scriptfork以非阻塞模式执行 Worker Script 并使用waitpidselect监视 Worker Script 进度。



use strict;
use warnings;

use IO::Select;
use POSIX ":sys_wait_h";

sub Launch
  my $rprocess = shift;
  my $irs = 0;

  local *logreader;
  local *errorreader;
  my $logwriter   = undef;
  my $errorwriter = undef;

  pipe( *logreader,   $logwriter );
  pipe( *errorreader, $errorwriter );

  #Spawn the Child Process
  my $iprcpid = fork();

  #Check the Success of Process Forking
  if(defined $iprcpid)
    #Sub Process Launch succeeded

    # Check whether parent/child process
    if($iprcpid > 0)
      #Parent Process


      $rprocess->{"pid"} = $iprcpid;
      $rprocess->{"log_pipe"} = *logreader;
      $rprocess->{"error_pipe"} = *errorreader;

      $irs = 1;
    elsif($iprcpid == 0)
      #Child Process

      my $ierr = 0;


      open( STDOUT, ">&=", $logwriter );
      open( STDERR, ">&=", $errorwriter );

      #Execute the configured Command

      print "cmd: '" . $rprocess->{"file"} . "'\n";

      print "cmd rng ...\n";

      print `$rprocess->{"file"}`; 

      $ierr = $?;

      print "closing transmission ...\n";   

      close STDOUT;
      close STDERR;

      exit $ierr; 
    else    #An Error has ocurred in the Sub Process Launch
        # Unable to fork
        print "ERROR: Sub Process '" . $rprocess->{"name"} . "' Launch failed: $!\n";
    }   #if($iprcpid > 0)
  else    #An Error has ocurred in the Process Spawning   
    # Unable to fork
    print "ERROR: Sub Process '" . $rprocess->{"name"} . "' Launch failed: $!\n";
  }  #if(defined $iprcpid)

  return $irs;

sub Read
  my $rprocess = shift;

  my $ppsel = $rprocess->{"select"};
  my $pplog = $rprocess->{"log_pipe"};
  my $pperr = $rprocess->{"error_pipe"};
  my @arrppselrdy = undef;
  my $pprdy = undef;
  my $srdln = "";
  my $irdcnt = 0;

  unless(defined $ppsel)
    $ppsel = IO::Select->new();


    #Store the Selector Object
    $rprocess->{"select"} = $ppsel;
  else  #The Selector was already created
    $ppsel = $rprocess->{"select"};
  }  #unless(defined $ppsel)    

  while(@arrppselrdy = $ppsel->can_read(1))
    foreach $pprdy (@arrppselrdy)
      $irdcnt = sysread($pprdy, $srdln, 8192);

      if(defined $irdcnt)
        if($irdcnt > 0)
          if(fileno($pprdy) == fileno($pplog))
            $rprocess->{"log"} .= "pipe (" . fileno($pprdy) . "): reading report ...\n";

            $rprocess->{"log"} .= $srdln;
          elsif(fileno($pprdy) == fileno($pperr))
            $rprocess->{"log"} .= "pipe (" . fileno($pprdy) . "): reading error ...\n";

            $rprocess->{"error"} .= $srdln;
          }   #if(fileno($pprdy) == fileno($pplog))
        else    #End of Transmission
          $rprocess->{"log"} .= "pipe (" . fileno($pprdy) . "): transmission done.\n";

          #Remove the Pipe File Handle

        } #if($irdcnt > 0)
      else  #Reading from the Pipe failed
        #Remove the Pipe File Handle

          $rprocess->{"error"} .= "ERROR: Sub Process " . $rprocess->{"name"} 
            . ": Read failed with [" . ($! * 1) . "]!\n"
            . "Message: '$!'\n";
        }  #if($!)
      }  #if(defined $irdcnt)
    }  #foreach $pprdy (@arrppselrdy)
  } #while(@arrppselrdy = $ppsel->can_read(1))

  $rprocess->{"log"} .= "try read done. '" . $ppsel->count . "' pipes left.\n";

sub Check
  my $rprocess = shift;
  my $irng = 0;

  if($rprocess->{"pid"} > 0
    && $rprocess->{"status"} < 0)
    my $ifinishpid = waitpid($rprocess->{"pid"}, WNOHANG);

    if($ifinishpid > -1)
      if($ifinishpid == 0)
        $irng = 1;
        $rprocess->{"status"} = $?;

      #Read the Message Pipes

      if($ifinishpid > 0)
        close $rprocess->{"log_pipe"};
        close $rprocess->{"error_pipe"};
    else  #Sub Process does not exist
      $rprocess->{"error"} .= "ERROR: Sub Process does not exist!\n";
    }  #if($ifinishpid > 0)
  }  #if($rprocess->{"pid"} > 0 && $rprocess->{"status"} < 0)

  return $irng;

my %hshprocesses = ("hello1" => {"name" => "hello1", "pid" => -1, "file" => "./hello.pl"
    , "log_pipe" => undef, "error_pipe" => undef, "select" => undef
    , "log" => "", "error" => "", "status" => -1}
  , "hello2" => {"name" => "hello2", "pid" => -1, "file" => "sleep 3 ; ./hello2.pl"
    , "log_pipe" => undef, "error_pipe" => undef, "select" => undef
    , "log" => "", "error" => "", "status" => -1}
  , "hello3" => {"name" => "hello3", "pid" => -1, "file" => "./hello3.pl ; sleep 2"
    , "log_pipe" => undef, "error_pipe" => undef, "select" => undef
    , "log" => "", "error" => "", "status" => -1}
  , "hello4" => {"name" => "hello4", "pid" => -1, "file" => "./hello4.pl"
    , "log_pipe" => undef, "error_pipe" => undef, "select" => undef
    , "log" => "", "error" => "", "status" => -1});

print "prcs launching ...\n";

foreach (keys %hshprocesses)
    print "prc '" . $hshprocesses{$_}{"name"} . "': Launch failed!\n";
}  #foreach (keys %hshprocesses)

print "prcs launched.\n";

my $irunningcount = 0;

  $irunningcount = 0;

  foreach (keys %hshprocesses)
    $irunningcount++ if(Check($hshprocesses{$_}));
  }  #foreach (keys %hshprocesses)

  print "prc rng cnt: '$irunningcount'\n";
while($irunningcount > 0);

foreach (keys %hshprocesses)
  print "process (" . $hshprocesses{$_}{"pid"} . ") '" . $hshprocesses{$_}{"name"} . "':\n";
  print "status [" . $hshprocesses{$_}{"status"} . "]\n";
  print "log:\n"
    . "'" . $hshprocesses{$_}{"log"} . "'\n";
  print "error:\n"
    . "'" . $hshprocesses{$_}{"error"} . "'\n";
}  #foreach (keys %hshprocesses)



$ ./master.pl
prcs launching ...
prcs launched.
prc rng cnt: '4'
prc rng cnt: '3'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '1'
prc rng cnt: '0'
process (2742) 'hello4':
status [0]
'pipe (3): reading report ...
cmd: './hello4.pl'
cmd rng ...
pipe (5): reading error ...
try read done. '2' pipes left.
pipe (5): reading error ...
pipe (3): reading report ...
hello4 script!!
hello waiting 3 sec ...
hello4 list:
cmd: 'ls -lah'
total 24K
drwxr-xr-x  2 bodo bodo   90 may  5 08:23 .
drwxr-xr-x 11 bodo bodo  128 may  4 18:49 ..
-rwxr-xr-x  1 bodo bodo  307 may  4 22:33 hello2.pl
-rwxr-xr-x  1 bodo bodo  307 may  4 22:37 hello3.pl
-rwxr-xr-x  1 bodo bodo  307 may  4 22:34 hello4.pl
-rwxr-xr-x  1 bodo bodo  303 may  4 18:50 hello.pl
-rwxr-xr-x  1 bodo bodo 5,7K may  5 08:23 master.pl
res: '0'
hello waiting 2 sec ...
pipe (3): reading report ...
closing transmission ...
pipe (5): transmission done.
pipe (3): transmission done.
try read done. '0' pipes left.
try read done. '0' pipes left.
process (2743) 'hello1':
status [0]
'pipe (4): reading report ...
cmd: 'sleep 3 ; ./hello.pl'
cmd rng ...
try read done. '2' pipes left.
pipe (7): reading error ...
try read done. '2' pipes left.
try read done. '2' pipes left.
pipe (7): reading error ...
pipe (4): reading report ...
hello script!!
hello waiting 3 sec ...
hello list:
cmd: 'ls -lah'
total 24K
drwxr-xr-x  2 bodo bodo   90 may  5 08:23 .
drwxr-xr-x 11 bodo bodo  128 may  4 18:49 ..
-rwxr-xr-x  1 bodo bodo  307 may  4 22:33 hello2.pl
-rwxr-xr-x  1 bodo bodo  307 may  4 22:37 hello3.pl
-rwxr-xr-x  1 bodo bodo  307 may  4 22:34 hello4.pl
-rwxr-xr-x  1 bodo bodo  303 may  4 18:50 hello.pl
-rwxr-xr-x  1 bodo bodo 5,7K may  5 08:23 master.pl
res: '0'
hello waiting 2 sec ...
closing transmission ...
pipe (7): transmission done.
pipe (4): transmission done.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
try read done. '0' pipes left.
process (2745) 'hello3':
status [0]
'pipe (6): reading report ...
cmd: './hello3.pl ; sleep 2'
cmd rng ...
pipe (9): reading error ...
try read done. '2' pipes left.
pipe (9): reading error ...
try read done. '2' pipes left.
pipe (6): reading report ...
hello3 script!!
hello waiting 3 sec ...
hello3 list:
cmd: 'ls -lah'
total 24K
drwxr-xr-x  2 bodo bodo   90 may  5 08:23 .
drwxr-xr-x 11 bodo bodo  128 may  4 18:49 ..
-rwxr-xr-x  1 bodo bodo  307 may  4 22:33 hello2.pl
-rwxr-xr-x  1 bodo bodo  307 may  4 22:37 hello3.pl
-rwxr-xr-x  1 bodo bodo  307 may  4 22:34 hello4.pl
-rwxr-xr-x  1 bodo bodo  303 may  4 18:50 hello.pl
-rwxr-xr-x  1 bodo bodo 5,7K may  5 08:23 master.pl
res: '0'
hello waiting 2 sec ...
closing transmission ...
pipe (9): transmission done.
pipe (6): transmission done.
try read done. '0' pipes left.
process (2746) 'hello2':
status [0]
'pipe (8): reading report ...
cmd: './hello2.pl'
cmd rng ...
pipe (11): reading error ...
try read done. '2' pipes left.
pipe (8): reading report ...
hello2 script!!
hello waiting 3 sec ...
hello2 list:
cmd: 'ls -lah'
total 24K
drwxr-xr-x  2 bodo bodo   90 may  5 08:23 .
drwxr-xr-x 11 bodo bodo  128 may  4 18:49 ..
-rwxr-xr-x  1 bodo bodo  307 may  4 22:33 hello2.pl
-rwxr-xr-x  1 bodo bodo  307 may  4 22:37 hello3.pl
-rwxr-xr-x  1 bodo bodo  307 may  4 22:34 hello4.pl
-rwxr-xr-x  1 bodo bodo  303 may  4 18:50 hello.pl
-rwxr-xr-x  1 bodo bodo 5,7K may  5 08:23 master.pl
res: '0'
hello waiting 2 sec ...
closing transmission ...
pipe (11): reading error ...
pipe (8): transmission done.
pipe (11): transmission done.
try read done. '0' pipes left.

但这仅在只有 1 个工作脚本时才有效。
如果 Master Script 尝试读取多个 Worker Script,则此传输管道在执行 Worker Script 之前由 Child 关闭hello.pl。最后读取的是:

        cmd: './hello.pl'
        cmd rng ...


        cmd: './hello.pl'
        cmd rng ...
        pipe (8): transmission done.


hello.pl STDERR有时我仍然在错误报告中找到第一行



现在有了这个测试脚本,我无法重现Broken Pipe的错误。
我在一个带有 perl v5.10 的系统上开发了这个应用程序,并且从 4 个进程 3 个运行和 1 个中断。特别是如果它sleep打电话。


编辑 2018-05-05:我构建了主脚本master.pl并使用 perl v5.24.1 运行它

标签: perlipc


由于没有人能在 1 个月内给我任何有用的答案甚至提示,我将发布我发现的关于断管现象的信息,以帮助可能面临类似问题的其他人。

我添加了足够长的睡眠时间,让我可以使用strace命令跟踪系统活动,它实际上记录了Broken Pipe错误:

$ strace -p 11729
Process 11729 attached
restart_syscall(<... resuming interrupted call ...>) = 0
write(1, "hello script!!\nhello waiting 20 "..., 67) = 67
pipe([3, 4])                            = 0
pipe([5, 6])                            = 0
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fe670d5f9d0) = 13238
close(6)                                = 0
close(4)                                = 0
read(5, "", 4)                          = 0
close(5)                                = 0
ioctl(3, SNDCTL_TMR_TIMEBASE or SNDRV_TIMER_IOCTL_NEXT_DEVICE or TCGETS, 0x7fff334b57b0) = -1 EINVAL (Invalid argument)
lseek(3, 0, SEEK_CUR)                   = -1 ESPIPE (Illegal seek)
read(3, "total 228K\ndrwxr-xr-x.  4 usr15 "..., 4096) = 1868
read(3, "", 4096)                       = 0
close(3)                                = 0
rt_sigaction(SIGHUP, {SIG_IGN, [], SA_RESTORER, 0x7fe66f8e27e0}, {SIG_DFL, [], 0}, 8) = 0
rt_sigaction(SIGINT, {SIG_IGN, [], SA_RESTORER, 0x7fe66f8e27e0}, {SIG_DFL, [], 0}, 8) = 0
rt_sigaction(SIGQUIT, {SIG_IGN, [], SA_RESTORER, 0x7fe66f8e27e0}, {SIG_DFL, [], 0}, 8) = 0
wait4(13238, [{WIFEXITED(s) && WEXITSTATUS(s) == 0}], 0, NULL) = 13238
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=13238, si_status=0, si_utime=0, si_stime=0} ---
rt_sigaction(SIGHUP, {SIG_DFL, [], SA_RESTORER, 0x7fe66f8e27e0}, NULL, 8) = 0
rt_sigaction(SIGINT, {SIG_DFL, [], SA_RESTORER, 0x7fe66f8e27e0}, NULL, 8) = 0
rt_sigaction(SIGQUIT, {SIG_DFL, [], SA_RESTORER, 0x7fe66f8e27e0}, NULL, 8) = 0
rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
nanosleep({30, 0}, 0x7fff334b5b90)      = 0
write(2, "HELLO ERROR 1 !\n", 16)       = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=11729, si_uid=501} ---
+++ killed by SIGPIPE +++

它记录了假设hello.pl正确完成并结束写入错误消息write(2, "HELLO ERROR 1 !\n", 16) = -1
但它发现管道已损坏,导致错误EPIPE (Broken pipe)导致它崩溃+++ killed by SIGPIPE +++


  • 如果分叉的子进程已经终止



    cmd: './hello.pl'
    cmd rng ...
    pipe (8): transmission done.
  • 来自print()分叉的子进程和sysread()父进程的组合。
    Output 实际上是在 Child 端缓冲,最后写入 Parent 端。


  • 这来自与实现该Object::DESTROY 方法的实际现实世界应用程序中的 Objecthood 的组合。
    但是在现实世界的应用程序中,进程并没有被立即读取并获得导致它们被 Object::DESTROY逻辑杀死的原因。
    但是我在执行日志中找不到这方面的东西,因为它 Object 已经被Perl 的 Garbage Collector销毁了。

