初始化slab分配器

    技术2024-07-15  66

    5.8.2 初始化slab分配器

    回到mm_init()函数,继续走,下一个函数kmem_cache_init(),也是重点函数,用于初始化内核slab分配体系。这个函数来自文件mm/slab.c

     

    1375void __init kmem_cache_init(void)

    1376{

    1377        size_t left_over;

    1378        struct cache_sizes *sizes;

    1379        struct cache_names *names;

    1380        int i;

    1381        int order;

    1382        int node;

    1383

    1384        if (num_possible_nodes() == 1)

    1385                use_alien_caches = 0;

    1386

    1387        for (i = 0; i < NUM_INIT_LISTS; i++) {

    1388                kmem_list3_init(&initkmem_list3[i]);

    1389                if (i < MAX_NUMNODES)

    1390                        cache_cache.nodelists[i] = NULL;

    1391        }

    1392        set_up_list3s(&cache_cache, CACHE_CACHE);

    1393

    ……

    1398        if (totalram_pages > (32 << 20) >> PAGE_SHIFT)

    1399                slab_break_gfp_order = BREAK_GFP_ORDER_HI;

    1400

    ……

    1420

    1421        node = numa_node_id();

    1422

    1423        /* 1) create the cache_cache */

    1424        INIT_LIST_HEAD(&cache_chain);

    1425        list_add(&cache_cache.next, &cache_chain);

    1426        cache_cache.colour_off = cache_line_size();

    1427        cache_cache.array[smp_processor_id()] = &initarray_cache.cache;

    1428        cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];

    1429

    ……

    1434        cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +

    1435                                 nr_node_ids * sizeof(struct kmem_list3 *);

    1436#if DEBUG

    1437        cache_cache.obj_size = cache_cache.buffer_size;

    1438#endif

    1439        cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,

    1440                                        cache_line_size());

    1441        cache_cache.reciprocal_buffer_size =

    1442                reciprocal_value(cache_cache.buffer_size);

    1443

    1444        for (order = 0; order < MAX_ORDER; order++) {

    1445                cache_estimate(order, cache_cache.buffer_size,

    1446                        cache_line_size(), 0, &left_over, &cache_cache.num);

    1447                if (cache_cache.num)

    1448                        break;

    1449        }

    1450        BUG_ON(!cache_cache.num);

    1451        cache_cache.gfporder = order;

    1452        cache_cache.colour = left_over / cache_cache.colour_off;

    1453        cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +

    1454                                      sizeof(struct slab), cache_line_size());

    1455

    1456        /* 2+3) create the kmalloc caches */

    1457        sizes = malloc_sizes;

    1458        names = cache_names;

    1459

    ……

    1466        sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,

    1467                                        sizes[INDEX_AC].cs_size,

    1468                                        ARCH_KMALLOC_MINALIGN,

    1469                                        ARCH_KMALLOC_FLAGS|SLAB_PANIC,

    1470                                        NULL);

    1471

    1472        if (INDEX_AC != INDEX_L3) {

    1473                sizes[INDEX_L3].cs_cachep =

    1474                        kmem_cache_create(names[INDEX_L3].name,

    1475                                sizes[INDEX_L3].cs_size,

    1476                                ARCH_KMALLOC_MINALIGN,

    1477                                ARCH_KMALLOC_FLAGS|SLAB_PANIC,

    1478                                NULL);

    1479        }

    1480

    1481        slab_early_init = 0;

    1482

    1483        while (sizes->cs_size != ULONG_MAX) {

    ……

    1491                if (!sizes->cs_cachep) {

    1492                        sizes->cs_cachep = kmem_cache_create(names->name,

    1493                                        sizes->cs_size,

    1494                                        ARCH_KMALLOC_MINALIGN,

    1495                                        ARCH_KMALLOC_FLAGS|SLAB_PANIC,

    1496                                        NULL);

    1497                }

    1498#ifdef CONFIG_ZONE_DMA

    1499                sizes->cs_dmacachep = kmem_cache_create(

    1500                                        names->name_dma,

    1501                                        sizes->cs_size,

    1502                                        ARCH_KMALLOC_MINALIGN,

    1503                                        ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|

    1504                                                SLAB_PANIC,

    1505                                        NULL);

    1506#endif

    1507                sizes++;

    1508                names++;

    1509        }

    1510        /* 4) Replace the bootstrap head arrays */

    1511        {

    1512                struct array_cache *ptr;

    1513

    1514                ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);

    1515

    1516                BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);

    1517                memcpy(ptr, cpu_cache_get(&cache_cache),

    1518                       sizeof(struct arraycache_init));

    1519                /*

    1520                 * Do not assume that spinlocks can be initialized via memcpy:

    1521                 */

    1522                spin_lock_init(&ptr->lock);

    1523

    1524                cache_cache.array[smp_processor_id()] = ptr;

    1525

    1526                ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);

    1527

    1528                BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)

    1529                       != &initarray_generic.cache);

    1530                memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),

    1531                       sizeof(struct arraycache_init));

    1532                /*

    1533                 * Do not assume that spinlocks can be initialized via memcpy:

    1534                 */

    1535                spin_lock_init(&ptr->lock);

    1536

    1537                malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =

    1538                    ptr;

    1539        }

    1540        /* 5) Replace the bootstrap kmem_list3's */

    1541        {

    1542                int nid;

    1543

    1544                for_each_online_node(nid) {

    1545                        init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);

    1546

    1547                        init_list(malloc_sizes[INDEX_AC].cs_cachep,

    1548                                  &initkmem_list3[SIZE_AC + nid], nid);

    1549

    1550                        if (INDEX_AC != INDEX_L3) {

    1551                                init_list(malloc_sizes[INDEX_L3].cs_cachep,

    1552                                          &initkmem_list3[SIZE_L3 + nid], nid);

    1553                        }

    1554                }

    1555        }

    1556

    1557        g_cpucache_up = EARLY;

    1558}

     

    去掉了若干行代码,别担心,全是注释。1387行,宏NUM_INIT_LISTS的值为

    #define NUM_INIT_LISTS (3 * MAX_NUMNODES)

    也就是3。执行3次循环,调用kmem_list3_init函数初始化全局变量initkmem_list3[]数组。该数组的定义也在同一个文件:

    struct kmem_list3 {

           struct list_head slabs_partial; /* partial list first, better asm code */

           struct list_head slabs_full;

           struct list_head slabs_free;

           unsigned long free_objects;

           unsigned int free_limit;

           unsigned int colour_next;      /* Per-node cache coloring */

           spinlock_t list_lock;

           struct array_cache *shared;    /* shared per node */

           struct array_cache **alien;    /* on other nodes */

           unsigned long next_reap;       /* updated without locking */

           int free_touched;           /* updated without locking */

    };

    struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];

     

    而初始化它每个元素的函数也很简单,位于同一个文件中:

     

    static void kmem_list3_init(struct kmem_list3 *parent)

    {

           INIT_LIST_HEAD(&parent->slabs_full);

           INIT_LIST_HEAD(&parent->slabs_partial);

           INIT_LIST_HEAD(&parent->slabs_free);

           parent->shared = NULL;

           parent->alien = NULL;

           parent->colour_next = 0;

           spin_lock_init(&parent->list_lock);

           parent->free_objects = 0;

           parent->free_touched = 0;

    }

     

    随后1392行,调用set_up_list3s函数为全局变量cache_cache初始化它的lists字段。全局变量cache_cache太重要了,它是slab体系的核心数据结构,其定义如下:

     

    static struct kmem_cache cache_cache = {

           .batchcount = 1,

           .limit = BOOT_CPUCACHE_ENTRIES,

           .shared = 1,

           .buffer_size = sizeof(struct kmem_cache),

           .name = "kmem_cache",

    };

     

    struct kmem_cache {

    /* 1) per-cpu data, touched during every alloc/free */

           struct array_cache *array[NR_CPUS];

    /* 2) Cache tunables. Protected by cache_chain_mutex */

           unsigned int batchcount;

           unsigned int limit;

           unsigned int shared;

     

           unsigned int buffer_size;

           u32 reciprocal_buffer_size;

    /* 3) touched by every alloc & free from the backend */

     

           unsigned int flags;         /* constant flags */

           unsigned int num;          /* # of objs per slab */

     

    /* 4) cache_grow/shrink */

           /* order of pgs per slab (2^n) */

           unsigned int gfporder;

     

           /* force GFP flags, e.g. GFP_DMA */

           gfp_t gfpflags;

     

           size_t colour;                /* cache colouring range */

           unsigned int colour_off; /* colour offset */

           struct kmem_cache *slabp_cache;

           unsigned int slab_size;

           unsigned int dflags;              /* dynamic flags */

     

           /* constructor func */

           void (*ctor)(void *obj);

     

    /* 5) cache creation/removal */

           const char *name;

           struct list_head next;

     

    /* 6) statistics */

    #ifdef CONFIG_DEBUG_SLAB

    ……slab调试相关数据结构,省略。

    #endif /* CONFIG_DEBUG_SLAB */

    ……一些注释……

           struct kmem_list3 *nodelists[MAX_NUMNODES];

           /*

            * Do not add fields after nodelists[]

            */

    };

     

    kmem_cache数据结构的注释写得很详细,大家可以去仔细看看。set_up_list3s函数在本文件中:

     

    #define    CACHE_CACHE 0

    static void __init set_up_list3s(struct kmem_cache *cachep, int index)

    {

           int node;

     

           for_each_online_node(node) {

                  cachep->nodelists[node] = &initkmem_list3[index + node];

                  cachep->nodelists[node]->next_reap = jiffies +

                      REAPTIMEOUT_LIST3 +

                      ((unsigned long)cachep) % REAPTIMEOUT_LIST3;

           }

    }

     

     

    很简单,就是把全局cache_cache变量的nodelists[0]设置成刚才初始化好的initkmem_list3[0]的地址。如果是NUMA体系,则每个NODE有一个initkmem_list3数组,由cache_cachenodelists[]数组的每个元素指向。

     

    继续走,1424行,初始化一个内核全局链表cache_chain,这个东西就是个很简单的list_head结构,定义在同一个文件中:

    static struct list_head cache_chain;

    随后调用list_add将它与cache_cache链接起来,接下来1426~1454行初始化这个cache_cache的其他字段

     

    来到1457行,又一个重要的全局变量malloc_sizes。这个变量关系着通用slab分配器的初始化,有关专用/通用slab分配器的概念是Linux kernel内存管理的核心内容,对这个概念不熟悉的同学请重新学习一下Linux内核管理。来看这个变量的定义,在同一文件的570行:

     

    570 struct cache_sizes malloc_sizes[] = {

    571 #define CACHE(x) { .cs_size = (x) },

    572 #include <linux/kmalloc_sizes.h>

    573     CACHE(ULONG_MAX)

    574 #undef CACHE

    575};

     

    cache_sizes是个如下结构:

    struct cache_sizes {

           size_t                  cs_size;

           struct kmem_cache *cs_cachep;

    #ifdef CONFIG_ZONE_DMA

           struct kmem_cache *cs_dmacachep;

    #endif

    };

     

    那么malloc_sizes[]数组的全部元素来自linux/kmalloc_sizes.h文件,下面就来看看这个文件的全部内容:

    #if (PAGE_SIZE == 4096)

           CACHE(32)

    #endif

           CACHE(64)

    #if L1_CACHE_BYTES < 64

           CACHE(96)

    #endif

           CACHE(128)

    #if L1_CACHE_BYTES < 128

           CACHE(192)

    #endif

           CACHE(256)

           CACHE(512)

           CACHE(1024)

           CACHE(2048)

           CACHE(4096)

           CACHE(8192)

           CACHE(16384)

           CACHE(32768)

           CACHE(65536)

           CACHE(131072)

    #if KMALLOC_MAX_SIZE >= 262144

           CACHE(262144)

    #endif

    #if KMALLOC_MAX_SIZE >= 524288

           CACHE(524288)

    #endif

    #if KMALLOC_MAX_SIZE >= 1048576

           CACHE(1048576)

    #endif

    #if KMALLOC_MAX_SIZE >= 2097152

           CACHE(2097152)

    #endif

    #if KMALLOC_MAX_SIZE >= 4194304

           CACHE(4194304)

    #endif

    #if KMALLOC_MAX_SIZE >= 8388608

           CACHE(8388608)

    #endif

    #if KMALLOC_MAX_SIZE >= 16777216

           CACHE(16777216)

    #endif

    #if KMALLOC_MAX_SIZE >= 33554432

           CACHE(33554432)

    #endif

     

    全局变量malloc_sizes数组的初始化就在编译vmlinux的时候定义成上述形式,其首地址在函数中被赋给了内部变量sizes1458行,cache_names是一个跟malloc_sizes差不多的全局变量数组:

    static struct cache_names __initdata cache_names[] = {

    #define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },

    #include <linux/kmalloc_sizes.h>

           {NULL,}

    #undef CACHE

    };

     

    其首地址同样被赋给了内部变量names。那么,1466-1509行,调用kmem_cache_create函数为每一个通用slab分配器初始化cache。。这个函数首先根据参数确定处理新高速缓存的最佳方法(例如,是在slab 的内部还是外部包含slab 描述符)。然后它从cache_cache普通高速缓存中为新的高速缓存分配一个高速缓存描述符kmem_cache_t

    (kmem_cache_t *) kmem_cache_alloc(&cache_cache, SLAB_KERNEL);

    并把这个描述符插入到高速缓存描述符的cache_chain链表中(当获得了用于保护链表避免被同时访问的cache_chain_sem 信号量后,插入操作完成)。具体的细节我就不多说了,有兴趣的同学可以参照博文“slab分配器”

    http://blog.csdn.net/yunsongice/archive/2010/01/30/5272715.aspx

    以及源代码进行分析。

     

    上述代码执行完毕后,slab通用分配器kmalloc函数就可以使用了。所以我们看到1514行调用kmalloc分配了一个arraycache_init结构,随后1516~1538行代码初始化涉及每CPUcache_cache.array,把以前初始化时期那些没用的数据给覆盖掉。

     

    1541~1555调用init_list函数把cache_cachemalloc_sizes[INDEX_AC].cs_cachepkmem_list3结构清空,因为没用了:

     

    static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,

                         int nodeid)

    {

           struct kmem_list3 *ptr;

     

           ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid);

           BUG_ON(!ptr);

     

           memcpy(ptr, list, sizeof(struct kmem_list3));

           /*

            * Do not assume that spinlocks can be initialized via memcpy:

            */

           spin_lock_init(&ptr->list_lock);

     

           MAKE_ALL_LISTS(cachep, ptr, nodeid);

           cachep->nodelists[nodeid] = ptr;

    }

     

    kmem_cache_init函数的最后一行,把全局变量g_cpucache_up设置成EARLYslab分配器就初始化完了。

     

    最新回复(0)