linux源码解析08–缺页异常之匿名页面

1,098次阅读
没有评论

接上篇
https://www.daodaodao123.com/?p=776

本篇解析缺页异常分支之一,匿名页面

1.缺页中断触发条件

(1)pte页表项的PRESENT没有置位
(2)pte表项为空
(3)vma->vm_ops->fault为空(对于私有的匿名页)

2.应用场景

(1)局部变量较大,函数调用较深进行栈扩展
(2)malloc从堆空间分配内存,只是分配了虚拟内存空间,没有映射到物理页,第一次访问时发生。
(3)mmap分配匿名页,第一次访问,只是分配了虚拟内存空间,没有映射到物理页,第一次访问时发生。

3.linux内存映射的两个规律

规律1:

mmap等内存映射,会将私有的vma映射设置为只读
参考mm/mmap.c

pgprot_t vm_get_page_prot(unsigned long vm_flags)
{
    pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
                (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
            pgprot_val(arch_vm_get_page_prot(vm_flags)));

    return arch_filter_pgprot(ret);
}

pgprot_t protection_map[16] __ro_after_init = {
    __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
    __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
};

#define __P000  PAGE_NONE
#define __P001  PAGE_READONLY
#define __P010  PAGE_READONLY
#define __P011  PAGE_READONLY
#define __P100  PAGE_EXECONLY
#define __P101  PAGE_READONLY_EXEC
#define __P110  PAGE_READONLY_EXEC
#define __P111  PAGE_READONLY_EXEC

#define __S000  PAGE_NONE
#define __S001  PAGE_READONLY
#define __S010  PAGE_SHARED
#define __S011  PAGE_SHARED
#define __S100  PAGE_EXECONLY
#define __S101  PAGE_READONLY_EXEC
#define __S110  PAGE_SHARED_EXEC
#define __S111  PAGE_SHARED_EXEC

规律2

共享匿名映射走shmem ,变成shmem的文件映射.
参见mm/mmap.c-->mmap_region()

unsigned long mmap_region(struct file *file, unsigned long addr,
        unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
        struct list_head *uf)
{
...
    if (file) {   ///文件映射  
        vma->vm_file = get_file(file);
        error = call_mmap(file, vma);
        if (error)
            goto unmap_and_free_vma;
        addr = vma->vm_start;
...
        vm_flags = vma->vm_flags;
    } else if (vm_flags & VM_SHARED) {   ///共享映射
        error = shmem_zero_setup(vma);   ///共享匿名映射,关联shmem的vma操作(ipc共享内存一样)
        if (error)
            goto free_vma;
    } else {
        vma_set_anonymous(vma);  ///私有匿名映射
    }

...
}

4.私有匿名缺页

私有匿名缺页,处理流程图如下:
linux源码解析08–缺页异常之匿名页面

源码解析:

/*
 * We enter with non-exclusive mmap_lock (to exclude vma changes,
 * but allow concurrent faults), and pte mapped but not yet locked.
 * We return with mmap_lock still held, but pte unmapped and unlocked.
 */
static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
{
    struct vm_area_struct *vma = vmf->vma;
    struct page *page;
    vm_fault_t ret = 0;
    pte_t entry;

    /* File mapping without ->vm_ops ? */
    if (vma->vm_flags & VM_SHARED)    ///防止共享的vma进入匿名页面的缺页中断,本函数只处理私有匿名映射
        return VM_FAULT_SIGBUS;

    /*
     * Use pte_alloc() instead of pte_alloc_map().  We can't run
     * pte_offset_map() on pmds where a huge pmd might be created
     * from a different thread.
     *
     * pte_alloc_map() is safe to use under mmap_write_lock(mm) or when
     * parallel threads are excluded by other means.
     *
     * Here we only have mmap_read_lock(mm).
     */
    if (pte_alloc(vma->vm_mm, vmf->pmd)) ///分配pte页表并填充到pmd
        return VM_FAULT_OOM;

    /* See comment in handle_pte_fault() */
    if (unlikely(pmd_trans_unstable(vmf->pmd)))
        return 0;

///处理分配页面只读情况,系统返回零页
    /* Use the zero-page for reads */
    if (!(vmf->flags & FAULT_FLAG_WRITE) &&
            !mm_forbids_zeropage(vma->vm_mm)) {
        entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), ///my_zero_pfn获取零页的页帧号
                        vma->vm_page_prot));
        vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,  ///获取pte页表项,同时获取锁保护
                vmf->address, &vmf->ptl);
        if (!pte_none(*vmf->pte)) {
            update_mmu_tlb(vma, vmf->address, vmf->pte);
            goto unlock;
        }
        ret = check_stable_address_space(vma->vm_mm);
        if (ret)
            goto unlock;
        /* Deliver the page fault to userland, check inside PT lock */
        if (userfaultfd_missing(vma)) {
            pte_unmap_unlock(vmf->pte, vmf->ptl);
            return handle_userfault(vmf, VM_UFFD_MISSING);
        }
        goto setpte;  ///写情况处理完,跳转setpte
    }

///处理vma可写情况
    /* Allocate our own private page. */
    if (unlikely(anon_vma_prepare(vma)))  ///为建立rmap做准备
        goto oom;
    page = alloc_zeroed_user_highpage_movable(vma, vmf->address);  ///分配一个可移动的匿名物理页面,优先使用高端内存(arm64不存在高端内存)
    if (!page)
        goto oom;

    if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL))
        goto oom_free_page;
    cgroup_throttle_swaprate(page, GFP_KERNEL);

    /*
     * The memory barrier inside __SetPageUptodate makes sure that
     * preceding stores to the page contents become visible before
     * the set_pte_at() write.
     */
    __SetPageUptodate(page); ///添加内存屏障

    entry = mk_pte(page, vma->vm_page_prot);  ///创建一个pte页表项
    if (vma->vm_flags & VM_WRITE)
        entry = pte_mkwrite(pte_mkdirty(entry));  ///设置可写标记

    vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,  ///获取pte页表项,并获得自旋锁,保证不被锁和打断
            &vmf->ptl);
    if (!pte_none(*vmf->pte)) {
        update_mmu_cache(vma, vmf->address, vmf->pte);
        goto release;
    }

    ret = check_stable_address_space(vma->vm_mm);
    if (ret)
        goto release;

    /* Deliver the page fault to userland, check inside PT lock */
    if (userfaultfd_missing(vma)) {
        pte_unmap_unlock(vmf->pte, vmf->ptl);
        put_page(page);
        return handle_userfault(vmf, VM_UFFD_MISSING);
    }

    inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);             ///增加进程匿名页计数
    page_add_new_anon_rmap(page, vma, vmf->address, false);    ///匿名页面添加到rmap系统
    lru_cache_add_inactive_or_unevictable(page, vma);          ///匿名页面添加到lru
setpte:
    set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);   ///填写页表项到硬件页表

    /* No need to invalidate - it was non-present before */
    update_mmu_cache(vma, vmf->address, vmf->pte);
unlock:
    pte_unmap_unlock(vmf->pte, vmf->ptl);
    return ret;
release:
    put_page(page);
    goto unlock;
oom_free_page:
    put_page(page);
oom:
    return VM_FAULT_OOM;
}

5.系统零页

补充一个说明,系统零页,一个只读的物理页面,内容是全零;在一些只读,且要求内容为0的场景,不分配物理页面,全部都映射到系统零页,可以节省大量内存,且大幅提高性能。常用的一个场景,写时复制;

应用程序使用malloc()分配虚拟内存后,三种情况:
(1)直接读,linux内核进入缺页异常,调用do_anonymous_page函数使用零页映射,此时PTE属性只读;
(2)先读后写,linux内核第一次触发缺页异常,映射零页;第二次触发异常,触发写时复制;
(3)直接写,linux内核进入匿名页面的缺页异常,调用alloc_zeroed_user_highpage_movable分配一个新页面,这个PTE是可写的;

正文完
 2
admin
版权声明:本站原创文章,由 admin 于2022-03-26发表,共计4937字。
转载说明:除特殊说明外本站文章皆由CC-4.0协议发布,转载请注明出处。
评论(没有评论)
粤ICP备2021172357号-1