Lab pgtbl: page tables
Speed up system calls (easy)
Some operating systems (e.g., Linux) speed up certain system calls by sharing data in a read-only region between userspace and the kernel. This eliminates the need for kernel crossings when performing these system calls. To help you learn how to insert mappings into a page table, your first task is to implement this optimization for the getpid()
system call in xv6.
When each process is created, map one read-only page at USYSCALL (a VA defined in memlayout.h
). At the start of this page, store a struct usyscall
(also defined in memlayout.h
), and initialize it to store the PID of the current process. For this lab, ugetpid()
has been provided on the userspace side and will automatically use the USYSCALL mapping. You will receive full credit for this part of the lab if the ugetpid
test case passes when running pgtbltest
.
Some hints:
You can perform the mapping in
proc_pagetable()
inkernel/proc.c
.Choose permission bits that allow userspace to only read the page.
You may find that
mappages()
is a useful utility.Don't forget to allocate and initialize the page in
allocproc()
.Make sure to free the page in
freeproc()
.
题目要求通过共享页面加速系统调用
kernel/proc.h
添加可被共享的页面
// Per-process state
struct proc {
...
struct trapframe *trapframe; // data page for trampoline.S
struct usyscall *usyscall; //共享页面
struct context context; // swtch() here to ...
};
kernel/proc.c
在进程创建页表之时,将物理页面映射至USYSCALL处
pagetable_t
proc_pagetable(struct proc *p) {
...
// map the trapframe just below TRAMPOLINE, for trampoline.S.
if (mappages(pagetable, TRAPFRAME, PGSIZE,
(uint64) (p->trapframe), PTE_R | PTE_W) < 0) {
uvmunmap(pagetable, TRAMPOLINE, 1, 0);
uvmfree(pagetable, 0);
return 0;
}
//映射页面
if (mappages(pagetable, USYSCALL, PGSIZE,
(uint64) (p->usyscall), PTE_R | PTE_U) < 0) {
uvmunmap(pagetable, TRAPFRAME, 1, 0);
uvmunmap(pagetable, TRAMPOLINE, 1, 0);
uvmfree(pagetable, 0);
return 0;
}
return pagetable;
}
kernel/proc.c
创建进程时,开辟并初始化共享页面,并将pid保存至共享页面
static struct proc *
allocproc(void) {
...
// Allocate a trapframe page.
if ((p->trapframe = (struct trapframe *) kalloc()) == 0) {
freeproc(p);
release(&p->lock);
return 0;
}
//开辟并初始化共享页面
if ((p->usyscall = (struct usyscall *) kalloc()) == 0) {
freeproc(p);
release(&p->lock);
return 0;
}
...
memset(&p->context, 0, sizeof(p->context));
p->context.ra = (uint64) forkret;
p->context.sp = p->kstack + PGSIZE;
//保存pid
p->usyscall->pid = p->pid;
return p;
}
kernel/proc.c
释放进程时,释放共享页面
static void
freeproc(struct proc *p) {
if (p->trapframe)
kfree((void *) p->trapframe);
p->trapframe = 0;
// 释放共享页面
if (p->usyscall) {
kfree((void *) p->usyscall);
}
p->usyscall = 0;
...
}
kernel/proc.c
释放进程页表时,移除相应页面(uvmfree函数必须要求移除pagetable中所有叶子节点,否则进入panic状态)
void
proc_freepagetable(pagetable_t pagetable, uint64 sz) {
uvmunmap(pagetable, TRAMPOLINE, 1, 0);
uvmunmap(pagetable, TRAPFRAME, 1, 0);
// 移除页面
uvmunmap(pagetable, USYSCALL, 1, 0);
uvmfree(pagetable, sz);
}
Print a page table (easy)
To help you visualize RISC-V page tables, and perhaps to aid future debugging, your second task is to write a function that prints the contents of a page table.
Define a function called vmprint()
. It should take a pagetable_t
argument, and print that pagetable in the format described below. Insert if(p->pid==1) vmprint(p->pagetable)
in exec.c just before the return argc
, to print the first process's page table. You receive full credit for this part of the lab if you pass the pte printout
test of make grade
.
Now when you start xv6 it should print output like this, describing the page table of the first process at the point when it has just finished exec()
ing init
:
page table 0x0000000087f6e000
..0: pte 0x0000000021fda801 pa 0x0000000087f6a000
.. ..0: pte 0x0000000021fda401 pa 0x0000000087f69000
.. .. ..0: pte 0x0000000021fdac1f pa 0x0000000087f6b000
.. .. ..1: pte 0x0000000021fda00f pa 0x0000000087f68000
.. .. ..2: pte 0x0000000021fd9c1f pa 0x0000000087f67000
..255: pte 0x0000000021fdb401 pa 0x0000000087f6d000
.. ..511: pte 0x0000000021fdb001 pa 0x0000000087f6c000
.. .. ..509: pte 0x0000000021fdd813 pa 0x0000000087f76000
.. .. ..510: pte 0x0000000021fddc07 pa 0x0000000087f77000
.. .. ..511: pte 0x0000000020001c0b pa 0x0000000080007000
The first line displays the argument to vmprint
. After that there is a line for each PTE, including PTEs that refer to page-table pages deeper in the tree. Each PTE line is indented by a number of " .."
that indicates its depth in the tree. Each PTE line shows the PTE index in its page-table page, the pte bits, and the physical address extracted from the PTE. Don't print PTEs that are not valid. In the above example, the top-level page-table page has mappings for entries 0 and 255. The next level down for entry 0 has only index 0 mapped, and the bottom-level for that index 0 has entries 0, 1, and 2 mapped.
Your code might emit different physical addresses than those shown above. The number of entries and the virtual addresses should be the same.
Some hints:
You can put
vmprint()
inkernel/vm.c
.Use the macros at the end of the file kernel/riscv.h.
The function
freewalk
may be inspirational.Define the prototype for
vmprint
in kernel/defs.h so that you can call it from exec.c.Use
%p
in your printf calls to print out full 64-bit hex PTEs and addresses as shown in the example.
题目要求打印pagetable信息
kernel/defs.h
定义函数头
... uartgetc(void);
// vm.c
..
//添加函数头
void vmprint(pagetable_t);
// plic.c
void plicinit(void);
...
kernel/exec.c
当执行exec且进程pid为1时,打印页表
int
exec(char *path, char **argv)
{
...
proc_freepagetable(oldpagetable, oldsz);
// 当进程pid为1时,打印页表
if(p->pid == 1){
vmprint(p->pagetable);
}
return argc; // this ends up in a0, the first argument to main(argc, argv)
...
return -1;
}
kernel/vm.c
打印页表
// 递归打印页表
void
vmprintwalk(uint64 paths[2][3] ,pagetable_t root,int cnt){
if(cnt == 2){
// 当递归到页表叶子节点时,打印页表前两项
printf(" ..%d: pte %p pa %p\n",paths[0][0],paths[0][1],paths[0][2]);
printf(" .. ..%d: pte %p pa %p\n",paths[1][0],paths[1][1],paths[1][2]);
}
// there are 2^9 = 512 PTEs in a page table.
// 查找pte的512项
for (int i = 0; i < 512; i++){
pte_t pte = root[i];
// this PTE points to a lower-level page table.
uint64 child = PTE2PA(pte);
if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){
// 记录路径
if(cnt < 2){
paths[cnt][0] = i;
paths[cnt][1] = pte;
paths[cnt][2] = child;
}else{
continue;
}
vmprintwalk(paths,(pagetable_t)child,cnt + 1);
} else if(pte & PTE_V){
printf(" .. .. ..%d: pte %p pa %p\n",i,pte,child);
}
}
}
/*打印页表*/
void
vmprint(pagetable_t root){
uint64 paths[2][3];
printf("page table %p\n" , root);
vmprintwalk(paths,root,0);
}
Detecting which pages have been accessed (hard)
Some garbage collectors (a form of automatic memory management) can benefit from information about which pages have been accessed (read or write). In this part of the lab, you will add a new feature to xv6 that detects and reports this information to userspace by inspecting the access bits in the RISC-V page table. The RISC-V hardware page walker marks these bits in the PTE whenever it resolves a TLB miss.
Your job is to implement pgaccess()
, a system call that reports which pages have been accessed. The system call takes three arguments. First, it takes the starting virtual address of the first user page to check. Second, it takes the number of pages to check. Finally, it takes a user address to a buffer to store the results into a bitmask (a datastructure that uses one bit per page and where the first page corresponds to the least significant bit). You will receive full credit for this part of the lab if the pgaccess
test case passes when running pgtbltest
.
Some hints:
Start by implementing
sys_pgaccess()
inkernel/sysproc.c
.You'll need to parse arguments using
argaddr()
andargint()
.For the output bitmask, it's easier to store a temporary buffer in the kernel and copy it to the user (via
copyout()
) after filling it with the right bits.It's okay to set an upper limit on the number of pages that can be scanned.
walk()
inkernel/vm.c
is very useful for finding the right PTEs.You'll need to define
PTE_A
, the access bit, inkernel/riscv.h
. Consult the RISC-V manual to determine its value.Be sure to clear
PTE_A
after checking if it is set. Otherwise, it won't be possible to determine if the page was accessed since the last timepgaccess()
was called (i.e., the bit will be set forever).vmprint()
may come in handy to debug page tables.
题目要求给定一段连续的page,搜集这些page哪些被访问过,将结果以掩码的形式返回给用户
kernel/riscv.h
#define PTE_V (1L << 0) // valid
#define PTE_R (1L << 1)
#define PTE_W (1L << 2)
#define PTE_X (1L << 3)
#define PTE_U (1L << 4) // 1 -> user can access
#define PTE_A (1L << 6) // 1 -> 该页被访问过
kernel/sysproc.h
// 声明walk函数
extern pte_t *walk(pagetable_t pagetable, uint64 va, int alloc);
#ifdef LAB_PGTBL
int
sys_pgaccess(void)
{
// lab pgtbl: your code here.
uint64 va,ua;
int pnum;
//获取参数
if(argaddr(0,&va)<0||
argint(1,&pnum)<0||
argaddr(2,&ua)){
return -1;
}
//如果需要扫描的页大于PGSIZE*8 返回-1
if(pnum>PGSIZE*8){
return -1;
}
//开辟一页缓冲区,缓冲区大小为PGSIZE*8位,即扫描的页数不能大于该数
char* buf = kalloc();
//初始化缓冲区,很关键,kalloc()会用无效数据填充开辟的页面而不是0
memset(buf,0,PGSIZE);
//从page的第cnt位开始标记
int cnt = (pnum/8+((pnum%8)!=0))*8-pnum;
printf("cnt = %d\n",cnt);
//依次扫描页面
for (int i=0;i<pnum;i++,cnt++){
pte_t* p = walk(myproc()->pagetable,va+i*PGSIZE,0);
if(*p&PTE_A){
//缓冲区对应位置1,pte的PTE_A置0
buf[cnt/8] |= 1<<(cnt%8);
*p &= ~PTE_A;
}
}
//传递结果给用户
copyout(myproc()->pagetable,ua,buf,pnum);
//释放页面
kfree(buf);
return 0;
}
#endif
Q.E.D.