Linux Virtual Memory Split with 3.5G/0.5G
问题
项目里用到的一款开发板,配置的是 32 位 ARM 处理器。因为业务程序相关方面的实际开销,需要在用户态虚拟地址空间里使用到大量内存,实际可能要超过 3GB,这引起了内存分配上的错误。
内核相关模块
Linux kernel 里,会把 4GB 的虚拟地址空间,划分成用户和内核两个部分,一般低地址是用户虚拟地址空间,高地址是内核虚拟地址窠(至于这两个部分里更具体的划分情况,由于跟这里要解决的问题关系不大,就不展开了),如下图:
+---------------+ <---- 4G
| |
| Kernel Space |
| |
+---------------+ <---- PAGE_OFFSET
| |
| |
| User Space |
| |
| |
+---------------+ <---- 0
Linux kernel 内核代码里有个选项 “Memory Split”,用来配置 4GB 的虚拟地址空间里,用户和内核虚拟地址空间的访问范围:
choice
prompt "Memory split"
default VMSPLIT_3G
help
Select the desired split between kernel and user memory.
If you are not absolutely sure what you are doing, leave this
option alone!
config VMSPLIT_3G
bool "3G/1G user/kernel split"
config VMSPLIT_2G
bool "2G/2G user/kernel split"
config VMSPLIT_1G
bool "1G/3G user/kernel split"
endchoice
config PAGE_OFFSET
hex
default 0x40000000 if VMSPLIT_1G
default 0x80000000 if VMSPLIT_2G
default 0xC0000000
一共有三种选择,结合 PAGE_OFFSET 的定义,表示的含义是这样的:
- 比如
VMSPLIT_3G表示用户虚拟地址空间大小是 3GB,范围从 [0, 3G);内核虚拟地址空间大小是 1GB,范围是 [3G, 4G)。 VMSPLIT_2G和VMSPLIT_1G相应的是另外两种定义。
通常情况下,默认是用 VMSPLIT_3G。如果内核虚拟地址空间里开销比较大,可以选择另外两个,这就需要缩小用户虚拟地址空间的范围。
针对项目实际情况,调整虚拟内存地址范围
目前项目里遇到的情况,即使 3GB 的用户虚拟地址空间,仍然不够用。
在已知选项无法满足实际需求的情况下,准备动手修改内核的实现,重新划分范围。以下是 3.5G/0.5G 的实现(针对的是 3.x 版本):
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1797,25 +1797,28 @@ choice
help
Select the desired split between kernel and user memory.
If you are not absolutely sure what you are doing, leave this
option alone!
+ config VMSPLIT_35G
+ bool "3.5G/0.5G user/kernel split"
config VMSPLIT_3G
bool "3G/1G user/kernel split"
config VMSPLIT_2G
bool "2G/2G user/kernel split"
config VMSPLIT_1G
bool "1G/3G user/kernel split"
endchoice
config PAGE_OFFSET
hex
default 0x40000000 if VMSPLIT_1G
default 0x80000000 if VMSPLIT_2G
- default 0xC0000000
+ default 0xC0000000 if VMSPLIT_3G
+ default 0xE0000000
config NR_CPUS
int "Maximum number of CPUs (2-32)"
range 2 32
depends on SMP
default "4"
diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -44,14 +44,20 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
memset(new_pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
/*
* Copy over the kernel and IO PGD entries
*/
init_pgd = pgd_offset_k(0);
+
+#if defined(CONFIG_VMSPLIT_35G) && defined(CONFIG_ARM_LPAE)
+ memset(new_pgd + USER_PTRS_PER_PGD, 0,
+ (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+#else
memcpy(new_pgd + USER_PTRS_PER_PGD, init_pgd + USER_PTRS_PER_PGD,
(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+#endif
clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t));
#ifdef CONFIG_ARM_LPAE
/*
* Allocate PMD table for modules and pkmap mappings.
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -113,25 +113,33 @@ ENDPROC(cpu_v7_set_pte_ext)
ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP << 16)
/*
* TTBR0/TTBR1 split (PAGE_OFFSET):
* 0x40000000: T0SZ = 2, T1SZ = 0 (not used)
* 0x80000000: T0SZ = 0, T1SZ = 1
* 0xc0000000: T0SZ = 0, T1SZ = 2
+ * 0xe0000000: T0SZ = 0, T1SZ = 3
*
* Only use this feature if PHYS_OFFSET <= PAGE_OFFSET, otherwise
* booting secondary CPUs would end up using TTBR1 for the identity
* mapping set up in TTBR0.
*/
bhi 9001f @ PHYS_OFFSET > PAGE_OFFSET?
+#if defined CONFIG_VMSPLIT_35G
+ orr \tmp, \tmp, #0x00030000 @ TTBCR.T1SZ
+#else
orr \tmp, \tmp, #(((PAGE_OFFSET >> 30) - 1) << 16) @ TTBCR.T1SZ
+#endif
#if defined CONFIG_VMSPLIT_2G
/* PAGE_OFFSET == 0x80000000, T1SZ == 1 */
add \ttbr1, \ttbr1, #1 << 4 @ skip two L1 entries
#elif defined CONFIG_VMSPLIT_3G
/* PAGE_OFFSET == 0xc0000000, T1SZ == 2 */
add \ttbr1, \ttbr1, #4096 * (1 + 3) @ only L2 used, skip pgd+3*pmd
+#elif defined CONFIG_VMSPLIT_35G
+ /* PAGE_OFFSET == 0xe0000000, T1SZ == 3 */
+ add \ttbr1, \ttbr1, #2048 * (2 + 7) @ only L2 used, skip pgd+(3+1/2)*pmd
#endif
/* CONFIG_VMSPLIT_1G does not need TTBR1 adjustment */
9001: mcr p15, 0, \tmp, c2, c0, 2 @ TTB control register
mcrr p15, 1, \ttbr1, \zero, c2 @ load TTBR1
.endm
这个实现里包括了以下几个方面:
- 添加了一个选项叫
VMSPLIT_35G,是3.5G/0.5G的范围划分; - 相应的
PAGE_OFFSET需要配置为 0xE0000000(也就是 3.5G),即用户虚拟地址空间的范围 [0, 0xE0000000) 和 [0xE0000000,4G); - 寄存器
ttbr1指向新的页目录里的位置(或者说是偏移),因为内核虚拟地址入口调整后,对应页目录里的位置会发生变化;当然,因为用户虚拟地址入口没有变化,所以ttbr0不需要调整;
应用这个补丁后,从项目的 defconfig 配置文件里,选中这里的 VMSPLIT_35G,重新编译生成内核即可。