xv6 Lab2: system calls
Lapin Gris Lv3

Lab2 实现 2 个 syscall。

Using gdb

第一部分是了解使用 gdb 调试 xv6(我使用的是 Debian)

# 其中一个窗口启动 qemu-gdb
cd xv6-labs-2023
make qemu-gdb


# 新开另一个窗口,连接 gdb
cd xv6-labs-2023
gdb-multiarch

xv6 项目,为我们提供了便捷的 .gdbinit 脚本,默认情况下(下图), GDB 好像不为我们自动加载这个脚本,
image.png

有两种方式可以让图中的提示消失,
第一种是将 xv6 的 .gdbinit 文件设置为 auto-load

mkdir -p /root/.config/gdb
echo "add-auto-load-safe-path /root/xv6-labs-2023/.gdbinit" > /root/.config/gdb/gdbinit

第二种是直接执行 source .gdbinit 手动加载,第一种设置 auto-load 的方式一劳永逸,下次 gdb 的时候,不用敲 source 了。

image.png

下面查看一下.gdbinit 里写了什么,打开 .gdbinit 发现它会帮我们,

  • 设置远端机器的架构 set architecture
  • 自动连接远端机器 target remote
  • 加载调试符号等操作 symbol-file
  • 其他

这些配置对于 GDB 远端调试都是必要的。特别解释一下 set architecture 的原因,现在市面上没有厂家量产 riscv 架构的机器,我们通常都是用 x86/aarch64 架构,存在一个跨架构调试的问题。

set confirm off
set architecture riscv:rv64
target remote 127.0.0.1:1234
symbol-file kernel/kernel
set disassemble-next-line auto
set riscv use-compressed-breakpoints yes

System call tracing

如何创建一个 syscall

user/user.h 中添加系统调用定义
user/usys.pl 中定义系统调用的入口,usys.pl 是一个脚本,会将所有的系统调用 entry 生成和对应的汇编 usys.S,将系统调用号存到 a7 寄存器,然后调用 ecall 指令进行系统调用,接着返回

.global trace
trace:
li a7, SYS_trace
ecall
ret

kernel/syscall.h 中新增一个新的系统调用号。
kernel/syscall.c 中 syscalls 数组新增 syscall 的响应函数,会在 syscall() 函数中以函数指针的方式被使用。
kernel/sysproc.c 中新增对应 syscall 的实际响应函数。以 sys_ 开头。 sys_ 开头的函数是个包装函数,先对应的寄存器中读取系统调用的参数,然后调用实际的函数完成系统调用。

实现原理

先在 struct proc (代表进程 process) 中添加 tracemask 字段,代表当前进程需要跟踪的 syscall。

diff --git a/kernel/proc.h b/kernel/proc.h
index d021857..028a13a 100644
--- a/kernel/proc.h
+++ b/kernel/proc.h
@@ -91,6 +91,7 @@ struct proc {
int killed; // If non-zero, have been killed
int xstate; // Exit status to be returned to parent's wait
int pid; // Process ID
+ int tracemask; // trace mask syscall ID

当进程发起 syscall 时,会检测当前进程的发起的 syscall 的 tracemask 是否被置位。置位则代表当前进程被 trace,输出当前发起 syscall 进程的 pid,syscall_name,syscall 返回值。

@@ -139,6 +167,9 @@ syscall(void)
// Use num to lookup the system call function for num, call it,
// and store its return value in p->trapframe->a0
p->trapframe->a0 = syscalls[num]();
+ if(p->tracemask & (1 << num)) {
+ printf("%d: syscall %s -> %d\n", p->pid, syscall_name[num], p->trapframe->a0);
+ }
} else {
printf("%d %s: unknown sys call %d\n",
p->pid, p->name, num);

调用路径

用户态同名函数 trace -> syscall -> sys_trace -> 内核态同名函数 trace

  • trace(userspace):发起 syscall
  • syscall:系统调用函数,通过 syscalls 数组中定义的函数指针,根据系统调用号获得对应的函数指针,调用对应的 sys_ 函数
  • sys_trace:包装函数,先通过寄存器读取用户态传入的系统调用参数,接着调用实际的 trace()
  • trace(kernelspace):设置当前进程的 tracemask 值为要追踪的系统调用号。
uint64
sys_trace(void) {
int syscall_id;

argint(0, &syscall_id);
trace(syscall_id);
return 0;
}
// trace syscall
int
trace(int syscall_id)
{
struct proc *p = myproc();

p->tracemask = syscall_id;

return 0;
}
void
syscall(void)
{
int num;
struct proc *p = myproc();

num = p->trapframe->a7;
p->trapframe->a0 = syscalls[num]();

}

tracemask 变量初始化、销毁

struct proc 新增了 tracemask 字段需要正确初始化以及销毁,fork 也需要复制将字段复制到子进程里才能符合 fork 的语义。

diff --git a/kernel/proc.c b/kernel/proc.c
index 58a8a0b..c646f82 100644
--- a/kernel/proc.c
+++ b/kernel/proc.c
@@ -163,6 +163,7 @@ freeproc(struct proc *p)
p->pagetable = 0;
p->sz = 0;
p->pid = 0;
+ p->tracemask = 0;
p->parent = 0;
p->name[0] = 0;
p->chan = 0;
@@ -311,6 +312,7 @@ fork(void)
safestrcpy(np->name, p->name, sizeof(p->name));

pid = np->pid;
+ np->tracemask = p->tracemask;

release(&np->lock);

Show me the code

Talk is cheap. Show me the code.

diff --git a/Makefile b/Makefile
index 365c91b..ccf335b 100644
--- a/Makefile
+++ b/Makefile
@@ -188,6 +188,7 @@ UPROGS=\
$U/_grind\
$U/_wc\
$U/_zombie\
+ $U/_trace\



diff --git a/kernel/defs.h b/kernel/defs.h
index a3c962b..9d10bf3 100644
--- a/kernel/defs.h
+++ b/kernel/defs.h
@@ -106,6 +106,7 @@ void yield(void);
int either_copyout(int user_dst, uint64 dst, void *src, uint64 len);
int either_copyin(void *dst, int user_src, uint64 src, uint64 len);
void procdump(void);
+int trace(int);

// swtch.S
void swtch(struct context*, struct context*);
diff --git a/kernel/proc.c b/kernel/proc.c
index 58a8a0b..c646f82 100644
--- a/kernel/proc.c
+++ b/kernel/proc.c
@@ -163,6 +163,7 @@ freeproc(struct proc *p)
p->pagetable = 0;
p->sz = 0;
p->pid = 0;
+ p->tracemask = 0;
p->parent = 0;
p->name[0] = 0;
p->chan = 0;
@@ -311,6 +312,7 @@ fork(void)
safestrcpy(np->name, p->name, sizeof(p->name));

pid = np->pid;
+ np->tracemask = p->tracemask;

release(&np->lock);

@@ -627,6 +629,19 @@ killed(struct proc *p)
return k;
}

+
+// trace syscall
+int
+trace(int syscall_id)
+{
+ struct proc *p = myproc();
+
+ p->tracemask = syscall_id;
+
+ return 0;
+}
+
+
// Copy to either a user address, or kernel address,
// depending on usr_dst.
// Returns 0 on success, -1 on error.
diff --git a/kernel/proc.h b/kernel/proc.h
index d021857..028a13a 100644
--- a/kernel/proc.h
+++ b/kernel/proc.h
@@ -91,6 +91,7 @@ struct proc {
int killed; // If non-zero, have been killed
int xstate; // Exit status to be returned to parent's wait
int pid; // Process ID
+ int tracemask; // trace mask syscall ID

// wait_lock must be held when using this:
struct proc *parent; // Parent process
diff --git a/kernel/syscall.c b/kernel/syscall.c
index ed65409..bf84a9c 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -101,6 +101,7 @@ extern uint64 sys_unlink(void);
extern uint64 sys_link(void);
extern uint64 sys_mkdir(void);
extern uint64 sys_close(void);
+extern uint64 sys_trace(void);

// An array mapping syscall numbers from syscall.h
// to the function that handles the system call.
@@ -126,6 +127,33 @@ static uint64 (*syscalls[])(void) = {
[SYS_link] sys_link,
[SYS_mkdir] sys_mkdir,
[SYS_close] sys_close,
+[SYS_trace] sys_trace,
+};
+
+// Array of syscall name
+static char *syscall_name[]= {
+[SYS_fork] "fork",
+[SYS_exit] "exit",
+[SYS_wait] "wait",
+[SYS_pipe] "pipe",
+[SYS_read] "read",
+[SYS_kill] "kill",
+[SYS_exec] "exec",
+[SYS_fstat] "fstat",
+[SYS_chdir] "chdir",
+[SYS_dup] "dup",
+[SYS_getpid] "getpid",
+[SYS_sbrk] "sbrk",
+[SYS_sleep] "sleep",
+[SYS_uptime] "uptime",
+[SYS_open] "open",
+[SYS_write] "write",
+[SYS_mknod] "mknod",
+[SYS_unlink] "unlink",
+[SYS_link] "link",
+[SYS_mkdir] "mkdir",
+[SYS_close] "close",
+[SYS_trace] "trace",
};

void
@@ -139,6 +167,9 @@ syscall(void)
// Use num to lookup the system call function for num, call it,
// and store its return value in p->trapframe->a0
p->trapframe->a0 = syscalls[num]();
+ if(p->tracemask & (1 << num)) {
+ printf("%d: syscall %s -> %d\n", p->pid, syscall_name[num], p->trapframe->a0);
+ }
} else {
printf("%d %s: unknown sys call %d\n",
p->pid, p->name, num);
diff --git a/kernel/syscall.h b/kernel/syscall.h
index bc5f356..cc112b9 100644
--- a/kernel/syscall.h
+++ b/kernel/syscall.h
@@ -20,3 +20,4 @@
#define SYS_link 19
#define SYS_mkdir 20
#define SYS_close 21
+#define SYS_trace 22
diff --git a/kernel/sysproc.c b/kernel/sysproc.c
index 3b4d5bd..2804c7d 100644
--- a/kernel/sysproc.c
+++ b/kernel/sysproc.c
@@ -91,3 +91,12 @@ sys_uptime(void)
release(&tickslock);
return xticks;
}
+
+uint64
+sys_trace(void) {
+ int syscall_id;
+
+ argint(0, &syscall_id);
+ trace(syscall_id);
+ return 0;
+}
diff --git a/user/user.h b/user/user.h
index 4d398d5..0bf4333 100644
--- a/user/user.h
+++ b/user/user.h
@@ -22,6 +22,7 @@ int getpid(void);
char* sbrk(int);
int sleep(int);
int uptime(void);
+int trace(int);

// ulib.c
int stat(const char*, struct stat*);
diff --git a/user/usys.pl b/user/usys.pl
index 01e426e..9c97b05 100755
--- a/user/usys.pl
+++ b/user/usys.pl
@@ -36,3 +36,4 @@ sub entry {
entry("sbrk");
entry("sleep");
entry("uptime");
+entry("trace");

思考

实现完 trace syscall 后,总觉得好像少点什么,不完整。仔细想想,我们实现的 trace 只能 trace 进程本身以及子进程?
Q: 那我们在现有的基础设施上,能否实现 trace 别的进程,也就是说给 trace 命令添加一个 -p 参数,能 trace 别的进程?
A: 实际上是可以的,而且在现有的基础设施上支持 -p 参数没有工作量,只需要重新实现 trace,将其中 myproc() 替换为遍历所有的进程,将其中某个 pid 进程的 tracemask 置位即可。

Sysinfo

实现一个 sysinfo 系统调用,获取 xv6 内核的空闲内存以及当前进程数,将数据从内核态拷贝到用户态。

sysinfo 系统调用

有两点需要注意,
第一点是向用户态拷贝数据时候,只需要将结构体的地址传进来,内核从首地址往后写数据,用户态收到数据后根据定义的结构体解析数据。所以,只需要一个地址 addr 就行了。
第二点是返回值,当 copyout 失败的时候,需要返回 -1。sysinfotest.c 里测试用例会构造一个错误的 addr 作为 sysinfo 的参数传进来,此时 sysinfo 会失败,测试用例会对比返回值是不是 0xffffffffffffffff ,也就是 -1 。这里涉及的是 C 语言负数的存储方式,补码。

// sysinfo syscall
int
sysinfo(uint64 addr)
{
int ret;
struct sysinfo info;
struct proc *p = myproc();

info.freemem = nr_freemem();
info.nproc = nr_processes();

ret = copyout(p->pagetable, addr, (char *)&info, sizeof(info));

return ret < 0 ? -1 : 0;
}

nr_freemem 和 nr_processes 两个函数的定义写在 kernel/defs.h 就行了, xv6 内核态所有的函数定义都放在此文件里。

获取空闲内存

xv6 的空闲内存使用链表管理的,通过将指针 r 指向当前 kmem.freelist 即空闲内存的头部,遍历直到访问为 NULL 时,期间访问的总数及空闲内存的个数。再乘上每页的大小可得空闲内存容量。

int nr_freemem(void) {
struct run *r;
uint64 amount = 0;
r = kmem.freelist;
while(r) {
r= r->next;
amount +=1;
}

return amount * PGSIZE;
}

获取进程数

xv6 内核进程使用全局维护的 proc 数组来表示,遍历它即可。

int nr_processes(void) {
struct proc *p;
int nr_p = 0;

for(p = proc; p < &proc[NPROC]; p++) {
if(p->state != UNUSED)
nr_p++;
}

return nr_p;
}

Show me the code

Talk is cheap. Show me the code.

diff --git a/Makefile b/Makefile
index ccf335b..0e12e1e 100644
--- a/Makefile
+++ b/Makefile
@@ -189,6 +189,7 @@ UPROGS=\
$U/_wc\
$U/_zombie\
$U/_trace\
+ $U/_sysinfotest\



diff --git a/kernel/defs.h b/kernel/defs.h
index 9d10bf3..5e2867a 100644
--- a/kernel/defs.h
+++ b/kernel/defs.h
@@ -63,6 +63,7 @@ void ramdiskrw(struct buf*);
void* kalloc(void);
void kfree(void *);
void kinit(void);
+int nr_freemem(void);

// log.c
void initlog(int, struct superblock*);
@@ -107,6 +108,8 @@ int either_copyout(int user_dst, uint64 dst, void *src, uint64 len);
int either_copyin(void *dst, int user_src, uint64 src, uint64 len);
void procdump(void);
int trace(int);
+int sysinfo(uint64);
+int nr_processes(void);

// swtch.S
void swtch(struct context*, struct context*);
diff --git a/kernel/kalloc.c b/kernel/kalloc.c
index 0699e7e..c953886 100644
--- a/kernel/kalloc.c
+++ b/kernel/kalloc.c
@@ -80,3 +80,16 @@ kalloc(void)
memset((char*)r, 5, PGSIZE); // fill with junk
return (void*)r;
}
+
+int
+nr_freemem(void) {
+ struct run *r;
+ uint64 amount = 0;
+ r = kmem.freelist;
+ while(r) {
+ r= r->next;
+ amount +=1;
+ }
+
+ return amount * PGSIZE;
+}
diff --git a/kernel/proc.c b/kernel/proc.c
index c646f82..3ddb18c 100644
--- a/kernel/proc.c
+++ b/kernel/proc.c
@@ -5,6 +5,7 @@
#include "spinlock.h"
#include "proc.h"
#include "defs.h"
+#include "sysinfo.h"

struct cpu cpus[NCPU];

@@ -641,6 +642,22 @@ trace(int syscall_id)
return 0;
}

+// sysinfo syscall
+int
+sysinfo(uint64 addr)
+{
+ int ret;
+ struct sysinfo info;
+ struct proc *p = myproc();
+
+ info.freemem = nr_freemem();
+ info.nproc = nr_processes();
+
+ ret = copyout(p->pagetable, addr, (char *)&info, sizeof(info));
+
+ return ret < 0 ? -1 : 0;
+}
+

// Copy to either a user address, or kernel address,
// depending on usr_dst.
@@ -701,3 +718,16 @@ procdump(void)
printf("\n");
}
}
+
+int
+nr_processes(void) {
+ struct proc *p;
+ int nr_p = 0;
+
+ for(p = proc; p < &proc[NPROC]; p++) {
+ if(p->state != UNUSED)
+ nr_p++;
+ }
+
+ return nr_p;
+}
diff --git a/kernel/syscall.c b/kernel/syscall.c
index bf84a9c..726635a 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -102,6 +102,7 @@ extern uint64 sys_link(void);
extern uint64 sys_mkdir(void);
extern uint64 sys_close(void);
extern uint64 sys_trace(void);
+extern uint64 sys_sysinfo(void);

// An array mapping syscall numbers from syscall.h
// to the function that handles the system call.
@@ -128,6 +129,7 @@ static uint64 (*syscalls[])(void) = {
[SYS_mkdir] sys_mkdir,
[SYS_close] sys_close,
[SYS_trace] sys_trace,
+[SYS_sysinfo] sys_sysinfo,
};

// Array of syscall name
@@ -154,6 +156,7 @@ static char *syscall_name[]= {
[SYS_mkdir] "mkdir",
[SYS_close] "close",
[SYS_trace] "trace",
+[SYS_sysinfo] "sysinfo",
};

void
diff --git a/kernel/syscall.h b/kernel/syscall.h
index cc112b9..7961890 100644
--- a/kernel/syscall.h
+++ b/kernel/syscall.h
@@ -21,3 +21,4 @@
#define SYS_mkdir 20
#define SYS_close 21
#define SYS_trace 22
+#define SYS_sysinfo 23
diff --git a/kernel/sysproc.c b/kernel/sysproc.c
index 2804c7d..b36a4e0 100644
--- a/kernel/sysproc.c
+++ b/kernel/sysproc.c
@@ -100,3 +100,14 @@ sys_trace(void) {
trace(syscall_id);
return 0;
}
+
+uint64
+sys_sysinfo(void) {
+ uint64 addr;
+ int ret;
+
+ argaddr(0, &addr);
+ ret = sysinfo(addr);
+
+ return ret < 0 ? -1 : 0;
+}
diff --git a/user/user.h b/user/user.h
index 0bf4333..1645956 100644
--- a/user/user.h
+++ b/user/user.h
@@ -1,4 +1,5 @@
struct stat;
+struct sysinfo;

// system calls
int fork(void);
@@ -23,6 +24,7 @@ char* sbrk(int);
int sleep(int);
int uptime(void);
int trace(int);
+int sysinfo(struct sysinfo *);

// ulib.c
int stat(const char*, struct stat*);
diff --git a/user/usys.pl b/user/usys.pl
index 9c97b05..bc109fd 100755
--- a/user/usys.pl
+++ b/user/usys.pl
@@ -37,3 +37,4 @@ sub entry {
entry("sleep");
entry("uptime");
entry("trace");
+entry("sysinfo");

测试结果

最后 make grade 查看结果,

== Test answers-syscall.txt ==
answers-syscall.txt: OK
== Test trace 32 grep ==
$ make qemu-gdb
trace 32 grep: OK (3.0s)
== Test trace all grep ==
$ make qemu-gdb
trace all grep: OK (0.9s)
== Test trace nothing ==
$ make qemu-gdb
trace nothing: OK (0.8s)
== Test trace children ==
$ make qemu-gdb
trace children: OK (32.3s)
== Test sysinfotest ==
$ make qemu-gdb
sysinfotest: OK (3.6s)
== Test time ==
time: OK
Score: 40/40