Re: open_posix_testsuite: STOP + CONT + wait hang?

From: naresh kamboju
Date: Wed Jun 17 2009 - 04:35:53 EST


Hi Oleg,

please find inline comments

On Sat, Jun 6, 2009 at 12:17 PM, Oleg Nesterov<oleg@xxxxxxxxxx> wrote:
> (change the subject)
>
> On 06/05, naresh kamboju wrote:
>>
>> I want to inform 2.6.29 signal issues,
>> As per my understanding I have noticed that if there is a delay
>> (sleep/nanosleep/usleep) in the child process. Child could not
>> reporting exit status to parent at this situation parent is waiting
>> for ever by combinations of SIGSTOP and SIGCONT. So test cases are
>> reporting as HUNG.
>
> Thanks for report, but please provide more info.
>
>> ARCH: ARM
>
> is it ARM specific? I can't reproduce the problem on x86.
yes it is specific to ARM. test cases are PASSED on X86-2.6.29 Kernels.
>
>> KERNEL: 2.6.29.1
>
> did you try other kernel versions?
i have tried on 2.6.23 kernels it is PASSED on both ARM and X68.
>
>> #define SLEEPSEC 5
> ...
>>       if ((pid = fork()) == 0) {
>>               /* child here */
>>               struct timespec tssleep;
>>
>>               tssleep.tv_sec=SLEEPSEC;
>>               tssleep.tv_nsec=0;
>>               if (clock_nanosleep(CLOCK_REALTIME, 0, &tssleep, NULL) == 0) {
>>                       printf("clock_nanosleep() returned success\n");
>>                       return CHILDPASS;
>>               } else {
>>                       printf("clock_nanosleep() did not return success\n");
>>                       return CHILDFAIL;
>>               }
>>               return CHILDFAIL;
>>       } else {
>>               /* parent here */
>>               int i;
>>
>>               sleep(1);
>>
>>               if (kill(pid, SIGSTOP) != 0) {
>>                       printf("Could not raise SIGSTOP\n");
>>                       return PTS_UNRESOLVED;
>>               }
>>
>>               if (kill(pid, SIGCONT) != 0) {
>>                       printf("Could not raise SIGCONT\n");
>>                       return PTS_UNRESOLVED;
>>               }
>>
>>               if (wait(&i) == -1) {
>
> And I guess it hangs here, right?
yes.
it is hangs here.
wait4(-1, Process 464 suspended

>
> The child should sleep SLEEPSEC seconds then exit, so the whole
> test-case should take SLEEPSEC seconds too.
yes. that is the expected.
>
>
> Do you mean it really hangs and never completes?
yes. it is hangs and never completes.
>
> Can you confirm it hangs in wait() ?
yes. wait4() syscall is waiting for child status.

>
> Does the child print "returned success" ?
it is not notifying parent about its exit status.

>
> If you can reproduce the problem, please send the content of
> /proc/CHILD_PID/status and /proc/PARENT_PID/status.

i have attached strace and proc_log
please review the same.

Best regards
Naresh Kamboju
>
> Oleg.
execve("./3-2.test", ["./3-2.test"], [/* 10 vars */]) = 0
brk(0) = 0x11000
uname({sys="Linux", node="43.88.101.211", ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1,
0) = 0x4001d000
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
open("/devel/etc/ld.so.cache", O_RDONLY) = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=13962, ...}) = 0
mmap2(NULL, 13962, PROT_READ, MAP_PRIVATE, 3, 0) = 0x4001e000
close(3) = 0
open("/devel/lib/libpthread.so.0", O_RDONLY) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0(\0\1\0\0\0008B\0\0004\0\0\0\200"...,
512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=805190, ...}) = 0
mmap2(NULL, 127472, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3,
0) = 0x40026000
mprotect(0x4003a000, 32768, PROT_NONE) = 0
mmap2(0x40042000, 8192, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x14) = 0x40042000
mmap2(0x40044000, 4592, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x40044000
close(3) = 0
open("/devel/lib/librt.so.1", O_RDONLY) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0(\0\1\0\0\0\240\26\0\0004\0\0\0\4"...,
512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=163621, ...}) = 0
mmap2(NULL, 57876, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3,
0) = 0x40046000
mprotect(0x4004c000, 28672, PROT_NONE) = 0
mmap2(0x40053000, 8192, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x5) = 0x40053000
close(3) = 0
open("/devel/lib/libm.so.6", O_RDONLY) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0(\0\1\0\0\0\2601\0\0004\0\0\0\324"...,
512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=1349258, ...}) = 0
mmap2(NULL, 688288, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3,
0) = 0x40055000
mprotect(0x400f5000, 28672, PROT_NONE) = 0
mmap2(0x400fc000, 8192, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x9f) = 0x400fc000
close(3) = 0
open("/devel/lib/libc.so.6", O_RDONLY) = 3
read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0(\0\1\0\0\0
R\1\0004\0\0\0\204"..., 512) = 512
fstat64(3, {st_mode=S_IFREG|0755, st_size=7855285, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1,
0) = 0x40022000
mmap2(NULL, 1249800, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE,
3, 0) = 0x400fe000
mprotect(0x40223000, 28672, PROT_NONE) = 0
mmap2(0x4022a000, 12288, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x124) = 0x4022a000
mmap2(0x4022d000, 8712, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x4022d000
close(3) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1,
0) = 0x40023000
set_tls(0x40022c80, 0x40022c80, 0x67c, 0x40023358, 0x40025048) = 0
mprotect(0x4022a000, 8192, PROT_READ) = 0
mprotect(0x400fc000, 4096, PROT_READ) = 0
mprotect(0x40053000, 4096, PROT_READ) = 0
mprotect(0x40042000, 4096, PROT_READ) = 0
mprotect(0x40024000, 4096, PROT_READ) = 0
munmap(0x4001e000, 13962) = 0
set_tid_address(0x40022828) = 464
set_robust_list(0x40022830, 0xc) = 0
rt_sigaction(SIGRTMIN, {0x4002a128, [], SA_SIGINFO|0x4000000}, NULL, 8) = 0
rt_sigaction(SIGRT_1, {0x40029ce8, [],
SA_RESTART|SA_SIGINFO|0x4000000}, NULL, 8) = 0
rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM_INFINITY}) = 0
clock_gettime(CLOCK_REALTIME, {1245240048, 291313608}) = 0
clone(Process 465 attached (waiting for parent)
Process 465 resumed (parent 464 ready)
child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD,
child_tidptr=0x40022828) = 465
[pid 465] nanosleep({5, 0}, <unfinished ...>
[pid 464] rt_sigprocmask(SIG_BLOCK, [CHLD], [], 8) = 0
[pid 464] rt_sigaction(SIGCHLD, NULL, {SIG_DFL, [], 0}, 8) = 0
[pid 464] rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
[pid 464] nanosleep({1, 0}, {1, 0}) = 0
[pid 464] kill(465, SIGSTOP <unfinished ...>
[pid 465] <... nanosleep resumed> 0) = ? ERESTART_RESTARTBLOCK (To
be restarted)
[pid 465] --- SIGSTOP (Stopped (signal)) @ 0 (0) ---
[pid 465] --- SIGSTOP (Stopped (signal)) @ 0 (0) ---
[pid 464] <... kill resumed> ) = 0
[pid 464] kill(465, SIGCONT) = 0
[pid 464] wait4(-1, Process 464 suspended
<unfinished ...>
[pid 465] --- SIGCONT (Continued) @ 0 (0) ---


/**************************************************************/

-bash-3.2# uname -a
Linux 2.6.29.2
-bash-3.2# cat proc/464/status
Name: 3-2.test
State: T (tracing stop)
Tgid: 464
Pid: 464
PPid: 462
TracerPid: 462
Uid: 0 0 0 0
Gid: 0 0 0 0
FDSize: 32
Groups: 0
VmPeak: 2332 kB
VmSize: 2316 kB
VmLck: 0 kB
VmHWM: 404 kB
VmRSS: 404 kB
VmData: 32 kB
VmStk: 84 kB
VmExe: 4 kB
VmLib: 2032 kB
VmPTE: 8 kB
Threads: 1
SigQ: 0/1024
SigPnd: 0000000000000000
ShdPnd: 0000000000000000
SigBlk: 0000000000000000
SigIgn: 0000000000000000
SigCgt: 0000000180000000
CapInh: 0000000000000000
CapPrm: fffffffffffffeff
CapEff: fffffffffffffeff
CapBnd: fffffffffffffeff
voluntary_ctxt_switches: 126
nonvoluntary_ctxt_switches: 31
-bash-3.2#
-bash-3.2# cat proc/465/status
Name: 3-2.test
State: R (running)
Tgid: 465
Pid: 465
PPid: 464
TracerPid: 462
Uid: 0 0 0 0
Gid: 0 0 0 0
FDSize: 32
Groups: 0
VmPeak: 2316 kB
VmSize: 2316 kB
VmLck: 0 kB
VmHWM: 164 kB
VmRSS: 164 kB
VmData: 32 kB
VmStk: 84 kB
VmExe: 4 kB
VmLib: 2032 kB
VmPTE: 8 kB
Threads: 1
SigQ: 0/1024
SigPnd: 0000000000000000
ShdPnd: 0000000000000000
SigBlk: 0000000000000000
SigIgn: 0000000000000000
SigCgt: 0000000180000000
CapInh: 0000000000000000
CapPrm: fffffffffffffeff
CapEff: fffffffffffffeff
CapBnd: fffffffffffffeff
voluntary_ctxt_switches: 7
nonvoluntary_ctxt_switches: 36623
-bash-3.2#



>
>
-bash-3.2# uname -a
Linux 2.6.29.2
-bash-3.2# cat proc/464/status
Name: 3-2.test
State: T (tracing stop)
Tgid: 464
Pid: 464
PPid: 462
TracerPid: 462
Uid: 0 0 0 0
Gid: 0 0 0 0
FDSize: 32
Groups: 0
VmPeak: 2332 kB
VmSize: 2316 kB
VmLck: 0 kB
VmHWM: 404 kB
VmRSS: 404 kB
VmData: 32 kB
VmStk: 84 kB
VmExe: 4 kB
VmLib: 2032 kB
VmPTE: 8 kB
Threads: 1
SigQ: 0/1024
SigPnd: 0000000000000000
ShdPnd: 0000000000000000
SigBlk: 0000000000000000
SigIgn: 0000000000000000
SigCgt: 0000000180000000
CapInh: 0000000000000000
CapPrm: fffffffffffffeff
CapEff: fffffffffffffeff
CapBnd: fffffffffffffeff
voluntary_ctxt_switches: 126
nonvoluntary_ctxt_switches: 31
-bash-3.2#
-bash-3.2# cat proc/465/status
Name: 3-2.test
State: R (running)
Tgid: 465
Pid: 465
PPid: 464
TracerPid: 462
Uid: 0 0 0 0
Gid: 0 0 0 0
FDSize: 32
Groups: 0
VmPeak: 2316 kB
VmSize: 2316 kB
VmLck: 0 kB
VmHWM: 164 kB
VmRSS: 164 kB
VmData: 32 kB
VmStk: 84 kB
VmExe: 4 kB
VmLib: 2032 kB
VmPTE: 8 kB
Threads: 1
SigQ: 0/1024
SigPnd: 0000000000000000
ShdPnd: 0000000000000000
SigBlk: 0000000000000000
SigIgn: 0000000000000000
SigCgt: 0000000180000000
CapInh: 0000000000000000
CapPrm: fffffffffffffeff
CapEff: fffffffffffffeff
CapBnd: fffffffffffffeff
voluntary_ctxt_switches: 7
nonvoluntary_ctxt_switches: 36623
-bash-3.2#

Attachment: nano_sleep_3-2.log
Description: Binary data