首页 > 解决方案 > 来自不同用户命名空间的能力

问题描述

我正在研究 linux 中的 posix 功能和命名空间,并受这些令人印象深刻的文章的启发编写了一些代码行,以更好地理解如何从不同的命名空间中看到这些功能。部分代码摘自文章的例子,不是我玩的...

#define _GNU_SOURCE
#include <unistd.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <signal.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <sched.h>
#include <sys/capability.h>
#include "caputilities.h"


#define errExit(msg)    do { perror(msg); exit(EXIT_FAILURE); \
                        } while (0)
#define MAXLEN 255

/* Replace commas in mapping string arguments with newlines */
static void get_mapstr(char *map){
    if (map==NULL) return;
    size_t map_len = strlen(map);
    for (int j = 0; j < map_len; j++)
        if (map[j] == ',') map[j] = '\n';
}

static void save_map(char *map, char *map_file){
    int fd;
    fd = open(map_file, O_RDWR);
    if (fd == -1) {
        fprintf(stderr, "open %s: %s\n", map_file, strerror(errno));
        exit(EXIT_FAILURE);
    }
    size_t map_len = strlen(map);
    if (write(fd, map, map_len) != map_len) {
        fprintf(stderr, "write %s: %s\n", map_file, strerror(errno));
        exit(EXIT_FAILURE);
    }
    close(fd);
}

/* Start function for cloned child */
static int childFunc(void *arg){
    pid_t pid = getpid();
    fprintf(stderr, "cloned child pid %ld\n", (long)pid);
    fprintf(stderr, "child process capabilities %s\n", cap_to_text(cap_get_proc(), NULL));
    fprintf(stderr, "euid %ld, egid %ld\n", (long)geteuid(), (long)getegid());
    if (arg!=NULL){ //user ns enabled 
        char *uidmap = ((char **)arg)[0];
        char *gidmap = ((char **)arg)[1];
        if (uidmap!=NULL) fprintf(stderr, "setting uid map %s\n", uidmap);
        if (gidmap!=NULL) fprintf(stderr, "setting gid map %s\n", gidmap);
        char map_path[MAXLEN + 1];
        if (uidmap != NULL){
            snprintf(map_path, MAXLEN, "/proc/%ld/uid_map", (long)pid);
            save_map(uidmap, map_path);
        }
        if (gidmap != NULL){
            snprintf(map_path, MAXLEN, "/proc/%ld/gid_map", (long)pid);
            save_map(gidmap, map_path);
        }
        fprintf(stderr, "child process capabilities %s\n", cap_to_text(cap_get_proc(), NULL));
        fprintf(stderr, "euid %ld, egid %ld\n", (long)geteuid(), (long)getegid());
    }
    sleep(200);
    exit(0);
}

static void usage(char *pname){
    fprintf(stderr, "Usage: %s -U -M mapstring -G mapstring\n", pname);
    fprintf(stderr, "       -U use user namespace\n");
    fprintf(stderr, "       -M uid mapping\n");
    fprintf(stderr, "       -G gid mapping\n");
    fprintf(stderr, "       mapstring is a comma separated list of mapping of the form:\n");
    fprintf(stderr, "       ID_inside-ns    ID-outside-ns   length [,ID_inside-ns    ID-outside-ns   length, ...]\n");
    exit(EXIT_FAILURE);
}

#define STACK_SIZE (1024 * 1024)

static char child_stack[STACK_SIZE];    /* Space for child's stack */

/* Receive a UID and/or GID mapping as arguments
   Every mapping consists of a list of tuple (separated by new line) of the form:
       ID_inside-ns    ID-outside-ns   length
   Requiring the user to supply a string that contains newlines is
   of course inconvenient for command-line use. Thus, we permit the
   use of commas to delimit records in this string, and replace them
   with newlines before writing the string to the file. */
int main(int argc, char *argv[]){
    int flags = 0;
    char *gid_map = NULL, *uid_map = NULL;
    int opt;
    while ((opt = getopt(argc, argv, "UM:G:")) != -1) {
        switch (opt){
            case 'U': flags |= CLONE_NEWUSER;
            case 'M': uid_map = optarg; break;
            case 'G': gid_map = optarg; break;
            default: usage(argv[0]);
        }
    }
    if ((uid_map != NULL || gid_map != NULL) && !(flags & CLONE_NEWUSER)){
        fprintf(stderr,"what about give me the user namespace option? what's in your mind today?\n");
        usage(argv[0]);
    } 
    char* args[2];
    get_mapstr(uid_map); args[0] = uid_map;
    get_mapstr(gid_map); args[1] = gid_map; 
    pid_t child_pid = clone(childFunc, child_stack + STACK_SIZE, flags | SIGCHLD, (flags & CLONE_NEWUSER) ? &args : NULL);
    if (child_pid == -1) errExit("clone");
    sleep(1);
    fprintf(stderr, "child process pid capabilities from parent: %s\n", cap_to_text(cap_get_pid(child_pid), NULL));
    fprintf(stderr, "euid %ld, egid %ld\n", (long)geteuid(), (long)getegid());
    exit(0);
}

我证明了从新命名空间中的子进程只能将父进程的外部命名空间中的有效用户 id 映射到新命名空间中的任何 uid,包括 root,但是如果您尝试从子进程映射不同的外部用户你得到错误。没关系。

$ ./testcap3 -U -M"1000 39 1"
cloned child pid 7659
child process capabilities = cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read+ep
euid 65534, egid 65534
setting uid map 1000 39 1
write /proc/7659/uid_map: Operation not permitted
child process pid capabilities from parent: = cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read+ep
euid 1000, egid 1000
$ ./testcap3 -U -M"0 1000 1"
cloned child pid 7665
child process capabilities = cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read+ep
euid 65534, egid 65534
setting uid map 0 1000 1
child process capabilities = cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read+ep
euid 0, egid 65534
child process pid capabilities from parent: = cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,cap_wake_alarm,cap_block_suspend,cap_audit_read+ep
euid 1000, egid 1000

我不明白为什么子进程的功能在从父进程打印时显示为全部启用。我本来希望在外部命名空间中看不到任何特权,我错了吗?显然二进制 testcap3 没有特权(文件上既没有设置 setuid/setgid 位也没有设置功能,并且有效用户不是管理员) 功能是如何存储的?数据结构如何与命名空间相关?

标签: posixcapabilitylinux-namespaces

解决方案


我修改了一些测试代码,以尝试从新命名空间中的克隆子项中杀死,并按预期检测权限错误。
所以我有机会深入研究内核代码来分析如何授予/拒绝杀死授权。
内核将要杀死的进程的名称空间与当前线程的名称空间进行比较,如果它们匹配,则检查当前线程是否启用了对 kill 有效的标志。
否则(不匹配命名空间)它检查当前线程是否是创建要杀死的进程的命名空间的进程的祖先,如果是,它允许继续评估其他 linux 安全模块(如果有)。
相反,如果杀手线程是目标进程的后代并且不在该进程的同一个命名空间中,则终止许可将被拒绝。

glibc 为 singnal.h 中定义的 kill 用户空间调用定义了一个弱符号,所以我认为被调用的代码是在内核级别定义的,这些是涉及的系统调用:

系统调用杀死

group_send_sig_info

check_kill_permission

kill_ok_by_cred

连接到 lsm 功能模块的能力


推荐阅读