LinuxKernel-list.h 源码不完全分析

有一段时间没认真写博客了,没能一直坚持着,实在让 Y7n05h 感到惭愧,所以今天写出本文也算是补救一下吧.

info
License
本文引用了部分来自 Linux Kernel 的源码,源码取自 LinuxKernel v2.6.34 基于 GPLv2

list.h 源码分析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
/*
* Simple doubly linked list implementation.
*
* Some of the internal functions ("__xxx") are useful when
* manipulating whole lists rather than single entries, as
* sometimes we already know the next/prev entries and we can
* generate better code by using them directly rather than
* using the generic single-entry routines.
*/

struct list_head {
struct list_head *next, *prev;
};

#define LIST_HEAD_INIT(name) { &(name), &(name) }

#define LIST_HEAD(name) \
struct list_head name = LIST_HEAD_INIT(name)

static inline void INIT_LIST_HEAD(struct list_head *list)
{
list->next = list;
list->prev = list;
}

这里是链表的核心结构,实现双向循环链表的初始化.

1
2
3
4
5
6
7
8
9
static inline void __list_add(struct list_head *new,
struct list_head *prev,
struct list_head *next)
{
next->prev = new;
new->next = next;
new->prev = prev;
prev->next = new;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
/**
* list_add - add a new entry
* @new: new entry to be added
* @head: list head to add it after
*
* Insert a new entry after the specified head.
* This is good for implementing stacks.
*/
static inline void list_add(struct list_head *new, struct list_head *head)
{
__list_add(new, head, head->next);
}


/**
* list_add_tail - add a new entry
* @new: new entry to be added
* @head: list head to add it before
*
* Insert a new entry before the specified head.
* This is useful for implementing queues.
*/
static inline void list_add_tail(struct list_head *new, struct list_head *head)
{
__list_add(new, head->prev, head);
}

关于插入也没什么需要过度解释的,唯一想说说的是 inline 的使用消除了函数调用的开销,当然代价是内核大小的增大,但我想这点代价是值得的.
当然,这里对 __list_add() 的复用和对两种不同的插入方式的抽象是十分精彩的.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
/*
* Delete a list entry by making the prev/next entries
* point to each other.
*
* This is only for internal list manipulation where we know
* the prev/next entries already!
*/
static inline void __list_del(struct list_head * prev, struct list_head * next)
{
next->prev = prev;
prev->next = next;
}

/**
* list_del - deletes entry from list.
* @entry: the element to delete from the list.
* Note: list_empty() on entry does not return true after this, the entry is
* in an undefined state.
*/
#ifndef CONFIG_DEBUG_LIST
static inline void list_del(struct list_head *entry)
{
__list_del(entry->prev, entry->next);
entry->next = LIST_POISON1;
entry->prev = LIST_POISON2;
}
#else
extern void list_del(struct list_head *entry);
#endif

关于这里,也没什么能产生太大疑惑的地方,唯一要好奇的可能是为什么要把被删除的链表节点的指针置为 LIST_POISON1LIST_POISON2
在用户态编程的时候,开发者们常把无效的指针置为 NULL 防止出现 Use After Free(UAF) 等问题的出现,一旦访问置为 NULL 的指针就能通过 Segment fault 得知发生了错误.但别忘了,Segment fault的检查是由内核完成的,在内核态编程时,自然是无法使用的.因此这里使用这两个特殊的地址触发分页保护告知开发者出现内存错误.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*
* Architectures might want to move the poison pointer offset
* into some well-recognized area such as 0xdead000000000000,
* that is also not mappable by user-space exploits:
*/
#ifdef CONFIG_ILLEGAL_POINTER_VALUE
# define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL)
#else
# define POISON_POINTER_DELTA 0
#endif

/*
* These are non-NULL pointers that will result in page faults
* under normal circumstances, used to verify that nobody uses
* non-initialized list entries.
*/
#define LIST_POISON1 ((void *) 0x00100100 + POISON_POINTER_DELTA)
#define LIST_POISON2 ((void *) 0x00200200 + POISON_POINTER_DELTA)

剩下的部分虽然也有很多内容,但都比较简单,相对来说也是易于理解的,Y7n05h 在这里就不赘述了.

这个宏函数还是很有趣的,能看到里面有很多 GNU 对 C 语言的扩展语法.直接从定义中看明白这个宏的用法是略有困难的,参考这个宏的用例将有助于理解.

1
2
3
4
5
6
7
8
/**
* list_entry - get the struct for this entry
* @ptr: the &struct list_head pointer.
* @type: the type of the struct this is embedded in.
* @member: the name of the list_struct within the struct.
*/
#define list_entry(ptr, type, member) \
container_of(ptr, type, member)
1
2
3
4
5
6
7
8
9
10
/**
* container_of - cast a member of a structure out to the containing structure
* @ptr: the pointer to the member.
* @type: the type of the container struct this is embedded in.
* @member: the name of the member within the struct.
*
*/
#define container_of(ptr, type, member) ({ \
const typeof( ((type *)0)->member ) *__mptr = (ptr); \
(type *)( (char *)__mptr - offsetof(type,member) );})

用例:

1
2
3
4
5
static inline struct nfs_page *
nfs_list_entry(struct list_head *head)
{
return list_entry(head, struct nfs_page, wb_list);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
struct nfs_page {
struct list_head wb_list; /* Defines state of page: */
struct page *wb_page; /* page to read in/write out */
struct nfs_open_context *wb_context; /* File state context info */
atomic_t wb_complete; /* i/os we're waiting for */
pgoff_t wb_index; /* Offset >> PAGE_CACHE_SHIFT */
unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */
wb_pgbase, /* Start of page data */
wb_bytes; /* Length of request */
struct kref wb_kref; /* reference count */
unsigned long wb_flags;
struct nfs_writeverf wb_verf; /* Commit cookie */
};

可以清晰的看到在 struct nfs_page 中,链表结点是 struct list_head wb_list

head 是指向 container_of 到底做了什么呢?那就是根据结构体中的成员的地址,计算出结构体的地址.首先,抛开代码考虑这件事情,在给定结构体在确定体系结构上使用确定对齐方式,那么结构体成员相对结构体的偏移量就是一个编译期能确定的常量.那么若有了结构体成员的地址,那么减去相应的偏移量即可得到结构体的地址.这一切在理论上都是可行的,剩下的事只是如何用代码实现.

其次,分析 container_of 的代码实现:

1
const typeof( ((type *)0)->member ) *__mptr = (ptr);

这里使用 typeof 进行类型推断,实现范型编程,声明获得与 member 相同的类型,并添加 * 获得 member 的指针类型.通过这一行,获得了指向结构体成员的指针.同时利用 (type *)( (char *)__mptr - offsetof(type,member) ) 根据 offsetof 关键字获得 membertype 中的偏移量,并使用指针运行将其从结构体成员的地址中减去.
最后则是使用 GNU 扩展的语句表达式语法,避免了需要将宏函数用 do{...}while(0) 包裹的麻烦事.
这些内容足够简单,但宏的运用与衔接十分精妙.

最后在谈谈

1
2
3
4
5
6
7
8
/**
* list_for_each - iterate over a list
* @pos: the &struct list_head to use as a loop cursor.
* @head: the head for your list.
*/
#define list_for_each(pos, head) \
for (pos = (head)->next; prefetch(pos->next), pos != (head); \
pos = pos->next)
1
2
3
#ifndef ARCH_HAS_PREFETCH
#define prefetch(x) __builtin_prefetch(x)
#endif

这里的遍历没什么好提及的,唯一想说说的地方只是 prefetchprefetch 也就是 __builtin_prefetch 看名字不难发现这是 GCC 的内置函数.查一下就能得知这是用来预读数据减少延迟的函数.大概就是防止后面用这个数据的时候出现缓存不命中吧.

好了,本文到此也就结束了.list.h 的别的部分 Y7n05h 认为也没有什么难以理解的内容了.

参考资料

1. LinuxKernel.