Linux 初始化过程页表建立
Linux 初始化过程,会依次建立如下页表映射:
1. 恒等映射:页表基地址 idmap_pg_dir;
2. 粗粒度内核镜像映射:即上篇博文里的“第二次建立页表映射”,页表基地址 init_pg_dir;
3.fixmap 映射:页表基地址为 init_pg_dir, 待 paging_init 之后为 swapper_pg_end;
4. 细粒度内核镜像映射:页表基地址为 swapper_pg_dir;
5. 线性映射:页表基地址为 swapper_pg_dir;
6. 用户空间页表映射:页表基地址 task->mm->pgd;
上篇已经解析了 1 和 2 映射,这篇解析fixmap 映射。
fixmap 映射的由来:
建立了恒等映射和粗粒度内核页表映射,只能保证内核镜像的正常访问,此时尚未建立内存管理子系统,如果想访问 bootloader 传入的 dtb,或者其他 io 设备,还是无法实现的,因此 Linux 提出了 fixmap.
fixmap:将一段 固定虚拟地址 映射到 dtb,以及想要访问的 IO 设备地址(比如串口,用于早期的打印调试);
1.fixmap 映射
源码参考 arch/arm64/kernel/setup.c 文件:
start_kernel()
->setup_arch()
->early_fixmap_init()
->early_ioramap_init()
->setup_machine_fdt(__fdt_pointer)
early_fixmap_init 函数
void __init early_fixmap_init(void)
{
pgd_t *pgdp;
p4d_t *p4dp, p4d;
pud_t *pudp;
pmd_t *pmdp;
unsigned long addr = FIXADDR_START;
pgdp = pgd_offset_k(addr); /// 获得 pgd 页表项
p4dp = p4d_offset(pgdp, addr);
p4d = READ_ONCE(*p4dp);
if (CONFIG_PGTABLE_LEVELS> 3 &&
!(p4d_none(p4d) || p4d_page_paddr(p4d) == __pa_symbol(bm_pud))) {
/*
* We only end up here if the kernel mapping and the fixmap
* share the top level pgd entry, which should only happen on
* 16k/4 levels configurations.
*/
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
pudp = pud_offset_kimg(p4dp, addr);
} else {if (p4d_none(p4d))
__p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); /// 填充 p4d 表项
pudp = fixmap_pud(addr); /// 获得 pud 表项
}
if (pud_none(READ_ONCE(*pudp)))
__pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); /// 填充 pud 表项
pmdp = fixmap_pmd(addr);
__pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE); /// 填充 pmd 表项
/*
* The boot-ioremap range spans multiple pmds, for which
* we are not prepared:
*/
BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
!= (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
|| pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {WARN_ON(1);
pr_warn("pmdp %p != %p, %p\n",
pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
fix_to_virt(FIX_BTMAP_BEGIN));
pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n",
fix_to_virt(FIX_BTMAP_END));
pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN);
}
}
early_ioremap_init 函数
void __init early_ioremap_setup(void)
{
int i;
for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
if (WARN_ON(prev_map[i]))
break;
for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); /// 根据索引,得到虚拟地址
}
上述填充完页表和计算准备虚拟地址,在 early_ioremap 函数,实际建立映射关系;
early_ioremap 函数
static void __init __iomem *
__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
{
unsigned long offset;
resource_size_t last_addr;
unsigned int nrpages;
enum fixed_addresses idx;
int i, slot;
WARN_ON(system_state>= SYSTEM_RUNNING);
slot = -1;
for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {if (!prev_map[i]) {
slot = i;
break;
}
}
if (WARN(slot < 0, "%s(%pa, %08lx) not found slot\n",
__func__, &phys_addr, size))
return NULL;
/* Don't allow wraparound or zero size */
last_addr = phys_addr + size - 1;
if (WARN_ON(!size || last_addr < phys_addr))
return NULL;
prev_size[slot] = size;
/*
* Mappings have to be page-aligned
*/
offset = offset_in_page(phys_addr);
phys_addr &= PAGE_MASK;
size = PAGE_ALIGN(last_addr + 1) - phys_addr;
/*
* Mappings have to fit in the FIX_BTMAP area.
*/
nrpages = size >> PAGE_SHIFT;
if (WARN_ON(nrpages> NR_FIX_BTMAPS))
return NULL;
/*
* Ok, go for it..
*/
idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
while (nrpages> 0) {if (after_paging_init)
__late_set_fixmap(idx, phys_addr, prot);
else
__early_set_fixmap(idx, phys_addr, prot); /// 建立虚拟地址到 phys_addr 的映射
phys_addr += PAGE_SIZE;
--idx;
--nrpages;
}
WARN(early_ioremap_debug, "%s(%pa, %08lx) [%d] => %08lx + %08lx\n",
__func__, &phys_addr, size, slot, offset, slot_virt[slot]);
prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]); /// 返回实际的虚拟地址
return prev_map[slot];
}
earlycon
在 driver/tty/serail/earlycon.c 文件,earlycon_map 函数实际做 ioremap 映射
static void __iomem * __init earlycon_map(resource_size_t paddr, size_t size)
{
void __iomem *base;
#ifdef CONFIG_FIX_EARLYCON_MEM
set_fixmap_io(FIX_EARLYCON_MEM_BASE, paddr & PAGE_MASK);
base = (void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE);
base += paddr & ~PAGE_MASK;
#else
base = ioremap(paddr, size);
#endif
if (!base)
pr_err("%s: Couldn't map %pa\n", __func__, &paddr);
return base;
}
3.dtb 映射
设备树,通过虚拟地址获取内存信息和板级信息;
setup_machine_fdt(__fdt_pointer);/// 设备树映射
void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
{const u64 dt_virt_base = __fix_to_virt(FIX_FDT); /// 获得设备树的虚拟地址 i
int offset;
void *dt_virt;
/*
* Check whether the physical FDT address is set and meets the minimum
* alignment requirement. Since we are relying on MIN_FDT_ALIGN to be
* at least 8 bytes so that we can always access the magic and size
* fields of the FDT header after mapping the first chunk, double check
* here if that is indeed the case.
*/
BUILD_BUG_ON(MIN_FDT_ALIGN < 8);
if (!dt_phys || dt_phys % MIN_FDT_ALIGN)
return NULL;
/*
* Make sure that the FDT region can be mapped without the need to
* allocate additional translation table pages, so that it is safe
* to call create_mapping_noalloc() this early.
*
* On 64k pages, the FDT will be mapped using PTEs, so we need to
* be in the same PMD as the rest of the fixmap.
* On 4k pages, we'll use section mappings for the FDT so we only
* have to be in the same PUD.
*/
BUILD_BUG_ON(dt_virt_base % SZ_2M);
BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT !=
__fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT);
offset = dt_phys % SWAPPER_BLOCK_SIZE;
dt_virt = (void *)dt_virt_base + offset;
/* map the first chunk so we can read the size from the header */
create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), /// 建立映射,页表 的已知的,不能临时分配(因为伙伴系统尚未工作)
dt_virt_base, SWAPPER_BLOCK_SIZE, prot);
if (fdt_magic(dt_virt) != FDT_MAGIC)
return NULL;
*size = fdt_totalsize(dt_virt);
if (*size> MAX_FDT_SIZE)
return NULL;
if (offset + *size> SWAPPER_BLOCK_SIZE)
create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot);
return dt_virt;
}
建立映射后,即可以通过虚拟地址访问 dtb 内容;
内核通过 dtb 收集了内存布局信息后,会通过 memblock 模块进行管理。最后资源保存在 memblock 的 memory type 数组中;
该部分内容会用在后续的内存子系统建立中(伙伴系统初始化);
请问大佬,用的内核版本是哪个呢?
@anson 最开始用的5.13,最近迁移到了5.15,有兴趣也可以参考我的仓库https://github.com/luteresa/linux.git,希望对你有点帮助
@雅克 好的,多谢!对了,大佬有公众号或微信群吗?
@anson 暂时还没有,等我把内存的东西整理完,可能会开个号。先用这个把,这个站一直会在。