做人呢,最紧要就系开心啦

linux源码解析08–缺页异常之匿名页面

934次阅读
没有评论

接上篇
https://www.daodaodao123.com/?p=776

本篇解析缺页异常分支之一,匿名页面

1. 缺页中断触发条件

(1)pte 页表项的 PRESENT 没有置位
(2)pte 表项为空
(3)vma->vm_ops->fault 为空(对于私有的匿名页)

2. 应用场景

(1)局部变量较大,函数调用较深进行 栈扩展
(2)malloc 从堆空间分配内存,只是分配了虚拟内存空间,没有映射到物理页,第一次访问时发生。
(3)mmap 分配匿名页, 第一次访问,只是分配了虚拟内存空间,没有映射到物理页,第一次访问时发生。

3.linux 内存映射的两个规律

规律 1:

mmap 等内存映射,会将 私有的 vma 映射设置为 只读
参考 mm/mmap.c

pgprot_t vm_get_page_prot(unsigned long vm_flags)
{
    pgprot_t ret = __pgprot(pgprot_val(protection_map[vm_flags &
                (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
            pgprot_val(arch_vm_get_page_prot(vm_flags)));

    return arch_filter_pgprot(ret);
}

pgprot_t protection_map[16] __ro_after_init = {
    __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
    __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
};

#define __P000  PAGE_NONE
#define __P001  PAGE_READONLY
#define __P010  PAGE_READONLY
#define __P011  PAGE_READONLY
#define __P100  PAGE_EXECONLY
#define __P101  PAGE_READONLY_EXEC
#define __P110  PAGE_READONLY_EXEC
#define __P111  PAGE_READONLY_EXEC

#define __S000  PAGE_NONE
#define __S001  PAGE_READONLY
#define __S010  PAGE_SHARED
#define __S011  PAGE_SHARED
#define __S100  PAGE_EXECONLY
#define __S101  PAGE_READONLY_EXEC
#define __S110  PAGE_SHARED_EXEC
#define __S111  PAGE_SHARED_EXEC

规律 2

共享 匿名映射 走 shmem,变成shmem 的文件映射.
参见 mm/mmap.c-->mmap_region()

unsigned long mmap_region(struct file *file, unsigned long addr,
        unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
        struct list_head *uf)
{
...
    if (file) {   /// 文件映射  
        vma->vm_file = get_file(file);
        error = call_mmap(file, vma);
        if (error)
            goto unmap_and_free_vma;
        addr = vma->vm_start;
...
        vm_flags = vma->vm_flags;
    } else if (vm_flags & VM_SHARED) {   /// 共享映射
        error = shmem_zero_setup(vma);   /// 共享匿名映射,关联 shmem 的 vma 操作(ipc 共享内存一样)
        if (error)
            goto free_vma;
    } else {vma_set_anonymous(vma);  /// 私有匿名映射
    }

...
}

4. 私有匿名缺页

私有匿名缺页,处理流程图如下:
linux 源码解析 08–缺页异常之匿名页面

源码解析:

/*
 * We enter with non-exclusive mmap_lock (to exclude vma changes,
 * but allow concurrent faults), and pte mapped but not yet locked.
 * We return with mmap_lock still held, but pte unmapped and unlocked.
 */
static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
{
    struct vm_area_struct *vma = vmf->vma;
    struct page *page;
    vm_fault_t ret = 0;
    pte_t entry;

    /* File mapping without ->vm_ops ? */
    if (vma->vm_flags & VM_SHARED)    /// 防止共享的 vma 进入匿名页面的缺页中断,本函数只处理私有匿名映射
        return VM_FAULT_SIGBUS;

    /*
     * Use pte_alloc() instead of pte_alloc_map().  We can't run
     * pte_offset_map() on pmds where a huge pmd might be created
     * from a different thread.
     *
     * pte_alloc_map() is safe to use under mmap_write_lock(mm) or when
     * parallel threads are excluded by other means.
     *
     * Here we only have mmap_read_lock(mm).
     */
    if (pte_alloc(vma->vm_mm, vmf->pmd)) /// 分配 pte 页表并填充到 pmd
        return VM_FAULT_OOM;

    /* See comment in handle_pte_fault() */
    if (unlikely(pmd_trans_unstable(vmf->pmd)))
        return 0;

/// 处理分配页面只读情况,系统返回零页
    /* Use the zero-page for reads */
    if (!(vmf->flags & FAULT_FLAG_WRITE) &&
            !mm_forbids_zeropage(vma->vm_mm)) {entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), ///my_zero_pfn 获取零页的页帧号
                        vma->vm_page_prot));
        vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,  /// 获取 pte 页表项,同时获取锁保护
                vmf->address, &vmf->ptl);
        if (!pte_none(*vmf->pte)) {update_mmu_tlb(vma, vmf->address, vmf->pte);
            goto unlock;
        }
        ret = check_stable_address_space(vma->vm_mm);
        if (ret)
            goto unlock;
        /* Deliver the page fault to userland, check inside PT lock */
        if (userfaultfd_missing(vma)) {pte_unmap_unlock(vmf->pte, vmf->ptl);
            return handle_userfault(vmf, VM_UFFD_MISSING);
        }
        goto setpte;  /// 写情况处理完,跳转 setpte
    }

/// 处理 vma 可写情况
    /* Allocate our own private page. */
    if (unlikely(anon_vma_prepare(vma)))  /// 为建立 rmap 做准备
        goto oom;
    page = alloc_zeroed_user_highpage_movable(vma, vmf->address);  /// 分配一个可移动的匿名物理页面,优先使用高端内存(arm64 不存在高端内存)
    if (!page)
        goto oom;

    if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL))
        goto oom_free_page;
    cgroup_throttle_swaprate(page, GFP_KERNEL);

    /*
     * The memory barrier inside __SetPageUptodate makes sure that
     * preceding stores to the page contents become visible before
     * the set_pte_at() write.
     */
    __SetPageUptodate(page); /// 添加内存屏障

    entry = mk_pte(page, vma->vm_page_prot);  /// 创建一个 pte 页表项
    if (vma->vm_flags & VM_WRITE)
        entry = pte_mkwrite(pte_mkdirty(entry));  /// 设置可写标记

    vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,  /// 获取 pte 页表项,并获得自旋锁,保证不被锁和打断
            &vmf->ptl);
    if (!pte_none(*vmf->pte)) {update_mmu_cache(vma, vmf->address, vmf->pte);
        goto release;
    }

    ret = check_stable_address_space(vma->vm_mm);
    if (ret)
        goto release;

    /* Deliver the page fault to userland, check inside PT lock */
    if (userfaultfd_missing(vma)) {pte_unmap_unlock(vmf->pte, vmf->ptl);
        put_page(page);
        return handle_userfault(vmf, VM_UFFD_MISSING);
    }

    inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);             /// 增加进程匿名页计数
    page_add_new_anon_rmap(page, vma, vmf->address, false);    /// 匿名页面添加到 rmap 系统
    lru_cache_add_inactive_or_unevictable(page, vma);          /// 匿名页面添加到 lru
setpte:
    set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);   /// 填写页表项到硬件页表

    /* No need to invalidate - it was non-present before */
    update_mmu_cache(vma, vmf->address, vmf->pte);
unlock:
    pte_unmap_unlock(vmf->pte, vmf->ptl);
    return ret;
release:
    put_page(page);
    goto unlock;
oom_free_page:
    put_page(page);
oom:
    return VM_FAULT_OOM;
}

5. 系统零页

补充一个说明,系统零页,一个只读的物理页面,内容是全零;在一些只读,且要求内容为 0 的场景,不分配物理页面,全部都映射到系统零页,可以节省大量内存,且大幅提高性能。常用的一个场景,写时复制;

应用程序使用 malloc()分配虚拟内存后,三种情况:
(1)直接读,linux 内核进入缺页异常,调用 do_anonymous_page 函数使用零页映射,此时 PTE 属性只读;
(2)先读后写,linux 内核第一次触发缺页异常,映射零页;第二次触发异常,触发写时复制;
(3)直接写,linux 内核进入匿名页面的缺页异常,调用 alloc_zeroed_user_highpage_movable 分配一个新页面,这个 PTE 是可写的;

正文完
 2
admin
版权声明:本站原创文章,由 admin 2022-03-26发表,共计4937字。
转载说明:除特殊说明外本站文章皆由CC-4.0协议发布,转载请注明出处。
评论(没有评论)