1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
|
/*
* Testing framework for PID namespace translation
*
* Copyright (c) 2020 Ákos Uzonyi <uzonyi.akos@gmail.com>
* Copyright (c) 2020-2022 The strace developers.
* All rights reserved.
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "tests.h"
#include "pidns.h"
#include <linux/nsfs.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <signal.h>
#include <stdlib.h>
#include <sched.h>
#include <unistd.h>
#include <sys/wait.h>
#include <linux/sched.h>
#include <fcntl.h>
#include <sys/ioctl.h>
static bool pidns_translation = false;
static bool pidns_unshared = false;
/* Our PIDs in strace's namespace */
static pid_t pidns_strace_ids[PT_COUNT];
void
pidns_print_leader(void)
{
if (pidns_translation)
printf("%-5d ", pidns_strace_ids[PT_TID]);
}
const char *
pidns_pid2str(enum pid_type type)
{
static const char format[] = " /* %d in strace's PID NS */";
static char buf[PT_COUNT][sizeof(format) + sizeof(int) * 3];
if (type < 0 || type >= PT_COUNT)
return "";
if (!pidns_unshared || !pidns_strace_ids[type])
return "";
snprintf(buf[type], sizeof(buf[type]), format, pidns_strace_ids[type]);
return buf[type];
}
/**
* This function is like fork, but does a few more things. It sets up the
* child's PGID and SID according to the parameters. Also it fills the
* pidns_strace_ids array in the child's memory with the PIDs of the child in
* parent's PID namespace. In the parent it waits for the child to terminate
* (but leaves the zombie to use it later as a process group). If the child
* terminates with nonzero exit status, the test is failed.
*
* @param pgid The process group the child should be moved to. It's expected
* to be a PID of a zombie process (will be reaped). If
* negative, leave the child in the process group of the parent.
* If 0, move the process to its own process group.
* @param new_sid Whether child should be moved to a new session.
*/
static pid_t
pidns_fork(pid_t pgid, bool new_sid)
{
int strace_ids_pipe[2];
if (pipe(strace_ids_pipe) < 0)
perror_msg_and_fail("pipe");
fflush(stdout);
pid_t pid = fork();
if (pid < 0)
perror_msg_and_fail("fork");
if (!pid) {
close(strace_ids_pipe[1]);
ssize_t len = read(strace_ids_pipe[0], pidns_strace_ids,
sizeof(pidns_strace_ids));
if (len < 0)
perror_msg_and_fail("read");
if (len != sizeof(pidns_strace_ids))
error_msg_and_fail("read returned < sizeof(pidns_strace_ids)");
close(strace_ids_pipe[0]);
if (pidns_strace_ids[PT_SID])
setsid();
return 0;
}
pidns_strace_ids[PT_TID] = pid;
pidns_strace_ids[PT_TGID] = pid;
pidns_strace_ids[PT_PGID] = 0;
pidns_strace_ids[PT_SID] = 0;
if (!pgid)
pgid = pid;
if (pgid > 0) {
if (setpgid(pid, pgid) < 0)
perror_msg_and_fail("setpgid");
pidns_strace_ids[PT_PGID] = pgid;
}
/* Reap group leader to test PGID decoding */
if (pgid > 0 && pgid != pid) {
int ret = waitpid(pgid, NULL, WNOHANG);
if (ret < 0)
perror_msg_and_fail("wait");
if (!ret)
error_msg_and_fail("could not reap group leader");
}
if (new_sid) {
pidns_strace_ids[PT_SID] = pid;
pidns_strace_ids[PT_PGID] = pid;
}
ssize_t len = write(strace_ids_pipe[1], pidns_strace_ids,
sizeof(pidns_strace_ids));
if (len < 0)
perror_msg_and_fail("write");
if (len != sizeof(pidns_strace_ids))
error_msg_and_fail("write returned < sizeof(pidns_strace_ids)");
close(strace_ids_pipe[0]);
close(strace_ids_pipe[1]);
/* WNOWAIT: leave the zombie, to be able to use it as a process group */
siginfo_t siginfo;
if (waitid(P_PID, pid, &siginfo, WEXITED | WNOWAIT) < 0)
perror_msg_and_fail("wait");
if (siginfo.si_code != CLD_EXITED || siginfo.si_status) {
if (siginfo.si_code == CLD_EXITED && siginfo.si_status == 77) {
error_msg_and_skip("child terminated with skip exit"
" status");
} else {
error_msg_and_fail("child terminated with nonzero exit"
" status");
}
}
return pid;
}
static void
create_init_process(void)
{
int child_pipe[2];
if (pipe(child_pipe) < 0)
perror_msg_and_fail("pipe");
pid_t pid = fork();
if (pid < 0)
perror_msg_and_fail("fork");
if (!pid) {
close(child_pipe[1]);
if (read(child_pipe[0], &child_pipe[1], sizeof(int)) != 0)
_exit(1);
_exit(0);
}
close(child_pipe[0]);
}
void
check_ns_ioctl(void)
{
int fd = open("/proc/self/ns/pid", O_RDONLY);
if (fd < 0) {
if (errno == ENOENT)
perror_msg_and_skip("opening /proc/self/ns/pid");
else
perror_msg_and_fail("opening /proc/self/ns/pid");
}
int userns_fd = ioctl(fd, NS_GET_USERNS);
if (userns_fd < 0) {
switch (errno) {
case ENOTTY:
error_msg_and_skip("NS_* ioctl commands are not "
"supported by the kernel");
break;
case EPERM:
error_msg_and_skip("NS_* ioctl commands are not "
"permitted by the kernel");
break;
default:
perror_msg_and_fail("ioctl(NS_GET_USERNS)");
}
}
close(userns_fd);
close(fd);
}
void
pidns_test_init(void)
{
pidns_translation = true;
check_ns_ioctl();
if (!pidns_fork(-1, false))
return;
/* Unshare user namespace too, so we do not need to be root */
if (unshare(CLONE_NEWUSER | CLONE_NEWPID) < 0) {
if (errno == EPERM)
perror_msg_and_skip("unshare");
perror_msg_and_fail("unshare");
}
pidns_unshared = true;
create_init_process();
if (!pidns_fork(-1, false))
return;
if (!pidns_fork(-1, true))
return;
pid_t pgid;
if (!(pgid = pidns_fork(0, false)))
return;
if (!pidns_fork(pgid, false))
return;
exit(0);
}
|