回到mm_init()函数,继续走,下一个函数kmem_cache_init(),也是重点函数,用于初始化内核slab分配体系。这个函数来自文件mm/slab.c
1375void __init kmem_cache_init(void)
1376{
1377 size_t left_over;
1378 struct cache_sizes *sizes;
1379 struct cache_names *names;
1380 int i;
1381 int order;
1382 int node;
1383
1384 if (num_possible_nodes() == 1)
1385 use_alien_caches = 0;
1386
1387 for (i = 0; i < NUM_INIT_LISTS; i++) {
1388 kmem_list3_init(&initkmem_list3[i]);
1389 if (i < MAX_NUMNODES)
1390 cache_cache.nodelists[i] = NULL;
1391 }
1392 set_up_list3s(&cache_cache, CACHE_CACHE);
1393
……
1398 if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
1399 slab_break_gfp_order = BREAK_GFP_ORDER_HI;
1400
……
1420
1421 node = numa_node_id();
1422
1423 /* 1) create the cache_cache */
1424 INIT_LIST_HEAD(&cache_chain);
1425 list_add(&cache_cache.next, &cache_chain);
1426 cache_cache.colour_off = cache_line_size();
1427 cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
1428 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
1429
……
1434 cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
1435 nr_node_ids * sizeof(struct kmem_list3 *);
1436#if DEBUG
1437 cache_cache.obj_size = cache_cache.buffer_size;
1438#endif
1439 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
1440 cache_line_size());
1441 cache_cache.reciprocal_buffer_size =
1442 reciprocal_value(cache_cache.buffer_size);
1443
1444 for (order = 0; order < MAX_ORDER; order++) {
1445 cache_estimate(order, cache_cache.buffer_size,
1446 cache_line_size(), 0, &left_over, &cache_cache.num);
1447 if (cache_cache.num)
1448 break;
1449 }
1450 BUG_ON(!cache_cache.num);
1451 cache_cache.gfporder = order;
1452 cache_cache.colour = left_over / cache_cache.colour_off;
1453 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1454 sizeof(struct slab), cache_line_size());
1455
1456 /* 2+3) create the kmalloc caches */
1457 sizes = malloc_sizes;
1458 names = cache_names;
1459
……
1466 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
1467 sizes[INDEX_AC].cs_size,
1468 ARCH_KMALLOC_MINALIGN,
1469 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1470 NULL);
1471
1472 if (INDEX_AC != INDEX_L3) {
1473 sizes[INDEX_L3].cs_cachep =
1474 kmem_cache_create(names[INDEX_L3].name,
1475 sizes[INDEX_L3].cs_size,
1476 ARCH_KMALLOC_MINALIGN,
1477 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1478 NULL);
1479 }
1480
1481 slab_early_init = 0;
1482
1483 while (sizes->cs_size != ULONG_MAX) {
……
1491 if (!sizes->cs_cachep) {
1492 sizes->cs_cachep = kmem_cache_create(names->name,
1493 sizes->cs_size,
1494 ARCH_KMALLOC_MINALIGN,
1495 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1496 NULL);
1497 }
1498#ifdef CONFIG_ZONE_DMA
1499 sizes->cs_dmacachep = kmem_cache_create(
1500 names->name_dma,
1501 sizes->cs_size,
1502 ARCH_KMALLOC_MINALIGN,
1503 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
1504 SLAB_PANIC,
1505 NULL);
1506#endif
1507 sizes++;
1508 names++;
1509 }
1510 /* 4) Replace the bootstrap head arrays */
1511 {
1512 struct array_cache *ptr;
1513
1514 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1515
1516 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
1517 memcpy(ptr, cpu_cache_get(&cache_cache),
1518 sizeof(struct arraycache_init));
1519 /*
1520 * Do not assume that spinlocks can be initialized via memcpy:
1521 */
1522 spin_lock_init(&ptr->lock);
1523
1524 cache_cache.array[smp_processor_id()] = ptr;
1525
1526 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1527
1528 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
1529 != &initarray_generic.cache);
1530 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
1531 sizeof(struct arraycache_init));
1532 /*
1533 * Do not assume that spinlocks can be initialized via memcpy:
1534 */
1535 spin_lock_init(&ptr->lock);
1536
1537 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1538 ptr;
1539 }
1540 /* 5) Replace the bootstrap kmem_list3's */
1541 {
1542 int nid;
1543
1544 for_each_online_node(nid) {
1545 init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);
1546
1547 init_list(malloc_sizes[INDEX_AC].cs_cachep,
1548 &initkmem_list3[SIZE_AC + nid], nid);
1549
1550 if (INDEX_AC != INDEX_L3) {
1551 init_list(malloc_sizes[INDEX_L3].cs_cachep,
1552 &initkmem_list3[SIZE_L3 + nid], nid);
1553 }
1554 }
1555 }
1556
1557 g_cpucache_up = EARLY;
1558}
去掉了若干行代码,别担心,全是注释。1387行,宏NUM_INIT_LISTS的值为
#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
也就是3。执行3次循环,调用kmem_list3_init函数初始化全局变量initkmem_list3[]数组。该数组的定义也在同一个文件:
struct kmem_list3 {
struct list_head slabs_partial; /* partial list first, better asm code */
struct list_head slabs_full;
struct list_head slabs_free;
unsigned long free_objects;
unsigned int free_limit;
unsigned int colour_next; /* Per-node cache coloring */
spinlock_t list_lock;
struct array_cache *shared; /* shared per node */
struct array_cache **alien; /* on other nodes */
unsigned long next_reap; /* updated without locking */
int free_touched; /* updated without locking */
};
struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
而初始化它每个元素的函数也很简单,位于同一个文件中:
static void kmem_list3_init(struct kmem_list3 *parent)
{
INIT_LIST_HEAD(&parent->slabs_full);
INIT_LIST_HEAD(&parent->slabs_partial);
INIT_LIST_HEAD(&parent->slabs_free);
parent->shared = NULL;
parent->alien = NULL;
parent->colour_next = 0;
spin_lock_init(&parent->list_lock);
parent->free_objects = 0;
parent->free_touched = 0;
}
随后1392行,调用set_up_list3s函数为全局变量cache_cache初始化它的lists字段。全局变量cache_cache太重要了,它是slab体系的核心数据结构,其定义如下:
static struct kmem_cache cache_cache = {
.batchcount = 1,
.limit = BOOT_CPUCACHE_ENTRIES,
.shared = 1,
.buffer_size = sizeof(struct kmem_cache),
.name = "kmem_cache",
};
struct kmem_cache {
/* 1) per-cpu data, touched during every alloc/free */
struct array_cache *array[NR_CPUS];
/* 2) Cache tunables. Protected by cache_chain_mutex */
unsigned int batchcount;
unsigned int limit;
unsigned int shared;
unsigned int buffer_size;
u32 reciprocal_buffer_size;
/* 3) touched by every alloc & free from the backend */
unsigned int flags; /* constant flags */
unsigned int num; /* # of objs per slab */
/* 4) cache_grow/shrink */
/* order of pgs per slab (2^n) */
unsigned int gfporder;
/* force GFP flags, e.g. GFP_DMA */
gfp_t gfpflags;
size_t colour; /* cache colouring range */
unsigned int colour_off; /* colour offset */
struct kmem_cache *slabp_cache;
unsigned int slab_size;
unsigned int dflags; /* dynamic flags */
/* constructor func */
void (*ctor)(void *obj);
/* 5) cache creation/removal */
const char *name;
struct list_head next;
/* 6) statistics */
#ifdef CONFIG_DEBUG_SLAB
……slab调试相关数据结构,省略。
#endif /* CONFIG_DEBUG_SLAB */
……一些注释……
struct kmem_list3 *nodelists[MAX_NUMNODES];
/*
* Do not add fields after nodelists[]
*/
};
kmem_cache数据结构的注释写得很详细,大家可以去仔细看看。set_up_list3s函数在本文件中:
#define CACHE_CACHE 0
static void __init set_up_list3s(struct kmem_cache *cachep, int index)
{
int node;
for_each_online_node(node) {
cachep->nodelists[node] = &initkmem_list3[index + node];
cachep->nodelists[node]->next_reap = jiffies +
REAPTIMEOUT_LIST3 +
((unsigned long)cachep) % REAPTIMEOUT_LIST3;
}
}
很简单,就是把全局cache_cache变量的nodelists[0]设置成刚才初始化好的initkmem_list3[0]的地址。如果是NUMA体系,则每个NODE有一个initkmem_list3数组,由cache_cache的nodelists[]数组的每个元素指向。
继续走,1424行,初始化一个内核全局链表cache_chain,这个东西就是个很简单的list_head结构,定义在同一个文件中:
static struct list_head cache_chain;
随后调用list_add将它与cache_cache链接起来,接下来1426~1454行初始化这个cache_cache的其他字段
来到1457行,又一个重要的全局变量malloc_sizes。这个变量关系着通用slab分配器的初始化,有关专用/通用slab分配器的概念是Linux kernel内存管理的核心内容,对这个概念不熟悉的同学请重新学习一下Linux内核管理。来看这个变量的定义,在同一文件的570行:
570 struct cache_sizes malloc_sizes[] = {
571 #define CACHE(x) { .cs_size = (x) },
572 #include <linux/kmalloc_sizes.h>
573 CACHE(ULONG_MAX)
574 #undef CACHE
575};
cache_sizes是个如下结构:
struct cache_sizes {
size_t cs_size;
struct kmem_cache *cs_cachep;
#ifdef CONFIG_ZONE_DMA
struct kmem_cache *cs_dmacachep;
#endif
};
那么malloc_sizes[]数组的全部元素来自linux/kmalloc_sizes.h文件,下面就来看看这个文件的全部内容:
#if (PAGE_SIZE == 4096)
CACHE(32)
#endif
CACHE(64)
#if L1_CACHE_BYTES < 64
CACHE(96)
#endif
CACHE(128)
#if L1_CACHE_BYTES < 128
CACHE(192)
#endif
CACHE(256)
CACHE(512)
CACHE(1024)
CACHE(2048)
CACHE(4096)
CACHE(8192)
CACHE(16384)
CACHE(32768)
CACHE(65536)
CACHE(131072)
#if KMALLOC_MAX_SIZE >= 262144
CACHE(262144)
#endif
#if KMALLOC_MAX_SIZE >= 524288
CACHE(524288)
#endif
#if KMALLOC_MAX_SIZE >= 1048576
CACHE(1048576)
#endif
#if KMALLOC_MAX_SIZE >= 2097152
CACHE(2097152)
#endif
#if KMALLOC_MAX_SIZE >= 4194304
CACHE(4194304)
#endif
#if KMALLOC_MAX_SIZE >= 8388608
CACHE(8388608)
#endif
#if KMALLOC_MAX_SIZE >= 16777216
CACHE(16777216)
#endif
#if KMALLOC_MAX_SIZE >= 33554432
CACHE(33554432)
#endif
全局变量malloc_sizes数组的初始化就在编译vmlinux的时候定义成上述形式,其首地址在函数中被赋给了内部变量sizes。1458行,cache_names是一个跟malloc_sizes差不多的全局变量数组:
static struct cache_names __initdata cache_names[] = {
#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
#include <linux/kmalloc_sizes.h>
{NULL,}
#undef CACHE
};
其首地址同样被赋给了内部变量names。那么,1466-1509行,调用kmem_cache_create函数为每一个通用slab分配器初始化cache。。这个函数首先根据参数确定处理新高速缓存的最佳方法(例如,是在slab 的内部还是外部包含slab 描述符)。然后它从cache_cache普通高速缓存中为新的高速缓存分配一个高速缓存描述符kmem_cache_t:
(kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);
并把这个描述符插入到高速缓存描述符的cache_chain链表中(当获得了用于保护链表避免被同时访问的cache_chain_sem 信号量后,插入操作完成)。具体的细节我就不多说了,有兴趣的同学可以参照博文“slab分配器”
http://blog.csdn.net/yunsongice/archive/2010/01/30/5272715.aspx
以及源代码进行分析。
上述代码执行完毕后,slab通用分配器kmalloc函数就可以使用了。所以我们看到1514行调用kmalloc分配了一个arraycache_init结构,随后1516~1538行代码初始化涉及每CPU的cache_cache.array,把以前初始化时期那些没用的数据给覆盖掉。
1541~1555调用init_list函数把cache_cache和malloc_sizes[INDEX_AC].cs_cachep的kmem_list3结构清空,因为没用了:
static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
int nodeid)
{
struct kmem_list3 *ptr;
ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid);
BUG_ON(!ptr);
memcpy(ptr, list, sizeof(struct kmem_list3));
/*
* Do not assume that spinlocks can be initialized via memcpy:
*/
spin_lock_init(&ptr->list_lock);
MAKE_ALL_LISTS(cachep, ptr, nodeid);
cachep->nodelists[nodeid] = ptr;
}
kmem_cache_init函数的最后一行,把全局变量g_cpucache_up设置成EARLY,slab分配器就初始化完了。