System call tracing

低配 strace
还是很复杂的,很多 stub code 要修改,要严格按照 hint 里面的顺序,不然会乱
感觉可以搜 sys_ 然后看着改
要理解 mask

  • The user-space code for systems calls is in user/user.h and user/usys.pl.
  • The kernel-space code is kernel/syscall.h, kernel/syscall.c.
  • The process-related code is kernel/proc.h and kernel/proc.c.
  • 先添加 kernel space 和 user space 的函数入口
// kernel/syscall.c
extern uint64 sys_trace(void);
 
// kernel/syscall.h
#define SYS_trace  22
 
// user/user.h
int trace(int);
 
// user/usys.pl
entry("trace");
  • 修改 proc 来储存要追踪的那些 syscall,注意 alignment,加到最后最好,这样比较无脑,xv6 理论上 32 位就够了,Linux 就不够:D。另外在新建 proc 时候最好 initialize 一下
// kernel/proc.h
// Per-process state
struct proc {
  ...
  
  uint32 trace_mask;
};
 
// kernel/proc.c, initialize
static struct proc *allocproc(void) {
  ...
 
  p->trace_mask = 0;
 
  return p;
}
  • 实现,参照其他 syscall 即可。为了追逐 child 还要修改 fork
// kernel/sysproc.c
uint64 sys_trace(void) {
  int mask;
 
  if (argint(0, &mask) != 0) { // 用于获取syscall参数
    return -1;
  }
 
  myproc()->trace_mask = mask;
  return 0;
}
 
// kernel/proc.c
int fork(void) {
  int i, pid;
  struct proc *np;
  struct proc *p = myproc();
 
  // Allocate process.
  if ((np = allocproc()) == 0) {
    return -1;
  }
 
  np->trace_mask = p->trace_mask;
 
  ...
}
  • 由于 syscall 函数是所有 syscall 的入口,在这里输出即可,用一个类 map 来实现索引并输出名字
static const char *syscall_names[] = {
    [SYS_fork] "fork",   [SYS_exit] "exit",     [SYS_wait] "wait",
    [SYS_pipe] "pipe",   [SYS_read] "read",     [SYS_kill] "kill",
    [SYS_exec] "exec",   [SYS_fstat] "fstat",   [SYS_chdir] "chdir",
    [SYS_dup] "dup",     [SYS_getpid] "getpid", [SYS_sbrk] "sbrk",
    [SYS_sleep] "sleep", [SYS_uptime] "uptime", [SYS_open] "open",
    [SYS_write] "write", [SYS_mknod] "mknod",   [SYS_unlink] "unlink",
    [SYS_link] "link",   [SYS_mkdir] "mkdir",   [SYS_close] "close",
    [SYS_trace] "trace",
};
 
void syscall(void) {
  int num;
  struct proc *p = myproc();
 
  num = p->trapframe->a7;
  if (num > 0 && num < NELEM(syscalls) && syscalls[num]) {
    p->trapframe->a0 = syscalls[num]();
    int pid = p->pid;
    int return_code = p->trapframe->a0;
 
    if ((p->trace_mask >> num) & 1) {
      printf("%d: syscall %s -> %d\n", pid, syscall_names[num], return_code);
    }
  } else {
    printf("%d %s: unknown syscall %d\n", p->pid, p->name, num);
    p->trapframe->a0 = -1;
  }
}
  • 之后成功得到输出
$ trace 32 grep hello README
3: syscall read -> 1023
3: syscall read -> 966
3: syscall read -> 70
3: syscall read -> 0
$
$ trace 2147483647 grep hello README
4: syscall trace -> 0
4: syscall exec -> 3
4: syscall open -> 3
4: syscall read -> 1023
4: syscall read -> 966
4: syscall read -> 70
4: syscall read -> 0
4: syscall close -> 0

sysinfo

一样的思路,不同的是

  1. 要知道如何获取 freememnproc (kalloc.c and proc.c)
  2. copy memory from kernel space to user space (copyout), since the page table are different.

添加 syscall 的方式同上

// user/user.h
// make it compile
struct sysinfo;
int sysinfo(struct sysinfo*);
 
// user/usys.pl
entry("sysinfo");
 
// kernel/syscall.h
#define SYS_sysinfo  23
 
// kernel/defs.h
uint64 proc_count(void);
uint64 kfree_mem_count(void);
 
// kernel/kalloc.c
uint64 kfree_mem_count() {
  uint64 mem_bytes = 0;
  struct run *r;
  
  acquire(&kmem.lock);
  r = kmem.freelist;
  while (r) {
    mem_bytes += PGSIZE;
    r = r->next;
  }
  release(&kmem.lock);
 
  return mem_bytes;
}
 
// kernel/proc.c
uint64 proc_count() {
  uint64 cnt = 0;
  for (struct proc *p = proc; p < &proc[NPROC]; p++) {
    acquire(&p->lock);
    if (p->state != UNUSED) {
      cnt++;
    }
    release(&p->lock);
  }
 
  return cnt;
}
 
// kernel/syscall.c
extern uint64 sys_sysinfo(void);
 
static uint64 (*syscalls[])(void) = {
    [SYS_fork] sys_fork,   [SYS_exit] sys_exit,     [SYS_wait] sys_wait,
    [SYS_pipe] sys_pipe,   [SYS_read] sys_read,     [SYS_kill] sys_kill,
    [SYS_exec] sys_exec,   [SYS_fstat] sys_fstat,   [SYS_chdir] sys_chdir,
    [SYS_dup] sys_dup,     [SYS_getpid] sys_getpid, [SYS_sbrk] sys_sbrk,
    [SYS_sleep] sys_sleep, [SYS_uptime] sys_uptime, [SYS_open] sys_open,
    [SYS_write] sys_write, [SYS_mknod] sys_mknod,   [SYS_unlink] sys_unlink,
    [SYS_link] sys_link,   [SYS_mkdir] sys_mkdir,   [SYS_close] sys_close,
    [SYS_trace] sys_trace, [SYS_sysinfo] sys_sysinfo
};
 
uint64 sys_sysinfo() {
  uint64 addr;
  if (argaddr(0, &addr) < 0) {
    return -1;  // questionable -1 in unit but i guess it's intentional
  }
 
  struct sysinfo sinfo;
  sinfo.freemem = kfree_mem_count();
  sinfo.nproc = proc_count();
 
  if (copyout(myproc()->pagetable, addr, (char *)&sinfo, sizeof(sinfo)) < 0) {
    return -1;
  }
 
  return 0;
}

result:

$ sysinfotest
sysinfotest: start
sysinfotest: OK

autograde

== Test trace 32 grep == 
$ make qemu-gdb
trace 32 grep: OK (3.1s) 
== Test trace all grep == 
$ make qemu-gdb
trace all grep: OK (1.2s) 
== Test trace nothing == 
$ make qemu-gdb
trace nothing: OK (1.1s) 
== Test trace children == 
$ make qemu-gdb
trace children: OK (18.2s) 
== Test sysinfotest == 
$ make qemu-gdb
sysinfotest: OK (2.8s) 
== Test time == 
time: FAIL 
    Cannot read time.txt
Score: 34/35

感想

为了隔离真的好麻烦。。操作系统和 libc 真是抽象完了.
整体感觉难不是很难,但是很繁琐,还要对操作系统有个整体认识。