初始化内存管理区列表

    技术2024-07-24  58

    5.4 初始化内存管理区列表

    回到start_kernel函数,569行的build_all_zonelists()函数,来自mm/page_alloc.c

     

    2815void build_all_zonelists(void)

    2816{

    2817        set_zonelist_order();

    2818

    2819        if (system_state == SYSTEM_BOOTING) {

    2820                __build_all_zonelists(NULL);

    2821                mminit_verify_zonelist();

    2822                cpuset_init_current_mems_allowed();

    2823        } else {

    2824                /* we have to stop all cpus to guarantee there is no user

    2825                   of zonelist */

    2826                stop_machine(__build_all_zonelists, NULL, NULL);

    2827                /* cpuset refresh routine should be here */

    2828        }

    2829        vm_total_pages = nr_free_pagecache_pages();

    2830        /* ……一大堆注释*/

    2837        if (vm_total_pages < (pageblock_nr_pages * MIGRATE_TYPES))

    2838                page_group_by_mobility_disabled = 1;

    2839        else

    2840                page_group_by_mobility_disabled = 0;

    2841

    2842        printk("Built %i zonelists in %s order, mobility grouping %s.  "

    2843                "Total pages: %ld/n",

    2844                        nr_online_nodes,

    2845                        zonelist_order_name[current_zonelist_order],

    2846                        page_group_by_mobility_disabled ? "off" : "on",

    2847                        vm_total_pages);

    2848#ifdef CONFIG_NUMA

    2849        printk("Policy zone: %s/n", zone_names[policy_zone]);

    2850#endif

    2851}

     

    其本质上调用__build_all_zonelists(NULL)

     

    2780/* return values int ....just for stop_machine() */

    2781static int __build_all_zonelists(void *dummy)

    2782{

    2783        int nid;

    2784        int cpu;

    2785

    2786#ifdef CONFIG_NUMA

    2787        memset(node_load, 0, sizeof(node_load));

    2788#endif

    2789        for_each_online_node(nid) {

    2790                pg_data_t *pgdat = NODE_DATA(nid);

    2791

    2792                build_zonelists(pgdat);

    2793                build_zonelist_cache(pgdat);

    2794        }

    2795

    2796        /* ……一大堆注释*/

    2809        for_each_possible_cpu(cpu)

    2810                setup_pageset(&per_cpu(boot_pageset, cpu), 0);

    2811

    2812        return 0;

    2813}

     

    2789行,for_each_online_node我们很熟悉了,只执行一次的循环。2790行是最著名的pg_data_t,就是NODE_DATA(0)的那个结构。随后执行build_zonelists函数:

     

    2637static void build_zonelists(pg_data_t *pgdat)

    2638{

    2639        int j, node, load;

    2640        enum zone_type i;

    2641        nodemask_t used_mask;

    2642        int local_node, prev_node;

    2643        struct zonelist *zonelist;

    2644        int order = current_zonelist_order;

    2645

    2646        /* initialize zonelists */

    2647        for (i = 0; i < MAX_ZONELISTS; i++) {

    2648                zonelist = pgdat->node_zonelists + i;

    2649                zonelist->_zonerefs[0].zone = NULL;

    2650                zonelist->_zonerefs[0].zone_idx = 0;

    2651        }

    2652

    2653        /* NUMA-aware ordering of nodes */

    2654        local_node = pgdat->node_id;

    2655        load = nr_online_nodes;

    2656        prev_node = local_node;

    2657        nodes_clear(used_mask);

    2658

    2659        memset(node_order, 0, sizeof(node_order));

    2660        j = 0;

    2661

    2662        while ((node = find_next_best_node(local_node, &used_mask)) >= 0) {

    2663                int distance = node_distance(local_node, node);

    2664

    2665                /*

    2666                 * If another node is sufficiently far away then it is better

    2667                 * to reclaim pages in a zone before going off node.

    2668                 */

    2669                if (distance > RECLAIM_DISTANCE)

    2670                        zone_reclaim_mode = 1;

    2671

    2672                /*

    2673                 * We don't want to pressure a particular node.

    2674                 * So adding penalty to the first node in same

    2675                 * distance group to make it round-robin.

    2676                 */

    2677                if (distance != node_distance(local_node, prev_node))

    2678                        node_load[node] = load;

    2679

    2680                prev_node = node;

    2681                load--;

    2682                if (order == ZONELIST_ORDER_NODE)

    2683                        build_zonelists_in_node_order(pgdat, node);

    2684                else

    2685                        node_order[j++] = node; /* remember order */

    2686        }

    2687

    2688        if (order == ZONELIST_ORDER_ZONE) {

    2689                /* calculate node order -- i.e., DMA last! */

    2690                build_zonelists_in_zone_order(pgdat, j);

    2691        }

    2692

    2693        build_thisnode_zonelists(pgdat);

    2694}

     

    build_zonelists函数2647-2651初始化NODE_DATA(0)node_zonelist字段。我们继续走:

     

    2697static void build_zonelist_cache(pg_data_t *pgdat)

    2698{

    2699        struct zonelist *zonelist;

    2700        struct zonelist_cache *zlc;

    2701        struct zoneref *z;

    2702

    2703        zonelist = &pgdat->node_zonelists[0];

    2704        zonelist->zlcache_ptr = zlc = &zonelist->zlcache;

    2705        bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);

    2706        for (z = zonelist->_zonerefs; z->zone; z++)

    2707                zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z);

    2708}

     

    build_zonelist_cache函数初始化内存管理区的缓存,我这里就不深入下去了。回到build_all_zonelists()函数中,略去调试的代码,以及设置几个关于zone的策略的全局变量的代码,该函数就结束了。

    最新回复(0)