在 大页进程页表的创建过程 一文中论述了大页进程fork子进程时页表的复制过程,本文论述 在进程退出时,进程的大页地址空间是如何被释放的。
进程退出时,和地址空间相关的函数调用顺序如下:
sys_exit->do_exit->exit_mm->mmput->exit_mmap->unmap_vmas->unmap_hugepage_range
static int is_hugetlb_entry_hwpoisoned(pte_t pte) { swp_entry_t swp; if (huge_pte_none(pte) || pte_present(pte)) return 0; swp = pte_to_swp_entry(pte); if (non_swap_entry(swp) && is_hwpoison_entry(swp)) { return 1; } else return 0; } void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page) { struct mm_struct *mm = vma->vm_mm; unsigned long address; pte_t *ptep; pte_t pte; struct page *page; struct page *tmp; struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); /* * A page gathering list, protected by per file i_mmap_lock. The * lock is used to avoid list corruption from multiple unmapping * of the same page since we are using page->lru. */ LIST_HEAD(page_list); WARN_ON(!is_vm_hugetlb_page(vma)); BUG_ON(start & ~huge_page_mask(h)); 需要取消地址映射关系的起始地址和结束地址必须得是按大页对齐的 BUG_ON(end & ~huge_page_mask(h)); mmu_notifier_invalidate_range_start(mm, start, end); spin_lock(&mm->page_table_lock); for (address = start; address < end; address += sz) { ptep = huge_pte_offset(mm, address); 获得address地址对应的ptep表项 if (!ptep) continue; 如果这个表项被其它mm共享,那么就不能取消address ~ address+sz 这段虚拟地址的映射关系;除了X86外,所以ARCH的这个接口都返回0,也就是不支持共享大页; if (huge_pmd_unshare(mm, &address, ptep)) continue; /* * If a reference page is supplied, it is because a specific * page is being unmapped, not a range. Ensure the page we * are about to unmap is the actual page of interest. */ if (ref_page) { pte = huge_ptep_get(ptep); if (huge_pte_none(pte)) continue; page = pte_page(pte); if (page != ref_page) continue; /* * Mark the VMA as having unmapped its page so that * future faults in this VMA will fail rather than * looking like data was lost */ set_vma_resv_flags(vma, HPAGE_RESV_UNMAPPED); } 取消address到物理页的映射关系,同时返回原来ptep表项中的内容pte; pte = huge_ptep_get_and_clear(mm, address, ptep); if (huge_pte_none(pte)) continue; 这个HWPoisioned page是什么意思? /* * HWPoisoned hugepage is already unmapped and dropped reference */ if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) continue; 如果pte对应的page是个脏页,那么需要把该page一并放在page_list中在下面统一处理; page = pte_page(pte); if (pte_dirty(pte)) set_page_dirty(page); list_add(&page->lru, &page_list); } spin_unlock(&mm->page_table_lock); flush_tlb_range(vma, start, end); mmu_notifier_invalidate_range_end(mm, start, end); list_for_each_entry_safe(page, tmp, &page_list, lru) { page_remove_rmap(page); list_del(&page->lru); put_page(page); } } void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct page *ref_page) { spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); __unmap_hugepage_range(vma, start, end, ref_page); spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); }