bigfile

写之前对文件系统一直有害怕的感觉,写完之后还是感觉害怕,这个做的感觉太基础了

第一反应是递归,但是不是完全规则的 pattern,还是老老实实 iterative
理论上有很多重复的地方可以抽象成函数,但是原先的实现很不 pure,不是很敢动手

实现上,总的来说跟着中文文档走就行了,要点都已经给出来了,那个图也很清晰

设计上,还是第一次见区分了 inodedinode 的,之前一直没想过可以分成一个 cache 一个 on disk version

.....panic: virtio_disk_intr status

写完之后一直报这个错,发现是这两行复制了上面的内容,但是操作的对象没改,少了个 + 1,debug 了好久

    if((addr = ip->addrs[NDIRECT + 1]) == 0)
      ip->addrs[NDIRECT + 1] = addr = balloc(ip->dev);

mkfs: mkfs/mkfs.c:85: main: Assertion `(BSIZE % sizeof(struct dinode)) == 0’ failed.

实现过程中的问题,根据报错找到 dinode 之后修改定义为 +2

同样的地方,还有一个名为 MAXFILE 的宏定义,表示当前文件系统所支持的数据 块数目,请查看此处并思考,当你要增加 xv6 文件系统对大文件的支持时,此宏 定义是否也需要修改为对应值?

当然需要,都增加比 MAXFILE 大的支持了,怎么不修改
特别是 grep 里面看到 test 里面用到了的时候


具体实现

// kernel/fs.h
#define NDIRECT 11
#define MAXFILE (NDIRECT + NINDIRECT + NINDIRECT * NINDIRECT)
 
// On-disk inode structure
struct dinode {
  short type;           // File type
  short major;          // Major device number (T_DEVICE only)
  short minor;          // Minor device number (T_DEVICE only)
  short nlink;          // Number of links to inode in file system
  uint size;            // Size of file (bytes)
  uint addrs[NDIRECT+2];   // Data block addresses
};
 
// kernel/file.h
// in-memory copy of an inode
struct inode {
  uint dev;           // Device number
  uint inum;          // Inode number
  int ref;            // Reference count
  struct sleeplock lock; // protects everything below here
  int valid;          // inode has been read from disk?
 
  short type;         // copy of disk inode
  short major;
  short minor;
  short nlink;
  uint size;
  uint addrs[NDIRECT+2];
};
 
 
// kernel/fs.c
static uint
bmap(struct inode *ip, uint bn)
{
  uint addr, *a;
  struct buf *bp;
 
  if(bn < NDIRECT){
    if((addr = ip->addrs[bn]) == 0)
      ip->addrs[bn] = addr = balloc(ip->dev);
    return addr;
  }
  bn -= NDIRECT;
 
  // first indirect
  if(bn < NINDIRECT){
    // Load indirect block, allocating if necessary.
    if((addr = ip->addrs[NDIRECT]) == 0)
      ip->addrs[NDIRECT] = addr = balloc(ip->dev);
    bp = bread(ip->dev, addr);
    a = (uint*)bp->data;
    if((addr = a[bn]) == 0){
      a[bn] = addr = balloc(ip->dev);
      log_write(bp);
    }
    brelse(bp);
    return addr;
  }
  bn -= NINDIRECT;
 
  // second indirect
  if (bn < NINDIRECT * NINDIRECT) {
    // Load indirect block, allocating if necessary.
    if((addr = ip->addrs[NDIRECT + 1]) == 0)
      ip->addrs[NDIRECT + 1] = addr = balloc(ip->dev);
    bp = bread(ip->dev, addr);
    a = (uint*)bp->data;
    if((addr = a[bn / NINDIRECT]) == 0){
      a[bn / NINDIRECT] = addr = balloc(ip->dev);
      log_write(bp);
    }
    brelse(bp);
 
    // Load second indirect block
    bn %= NINDIRECT;
    // No need to baaloc the dev again since we already allocated the block
    bp = bread(ip->dev, addr);
    a = (uint*)bp->data;
    if((addr = a[bn]) == 0){
      a[bn] = addr = balloc(ip->dev);
      log_write(bp);
    }
    brelse(bp);
 
    return addr;
  }
 
  panic("bmap: out of range");
}
 
// Truncate inode (discard contents).
// Caller must hold ip->lock.
void
itrunc(struct inode *ip)
{
  int i, j;
  struct buf *bp;
  uint *a;
 
  for(i = 0; i < NDIRECT; i++){
    if(ip->addrs[i]){
      bfree(ip->dev, ip->addrs[i]);
      ip->addrs[i] = 0;
    }
  }
 
  if(ip->addrs[NDIRECT]){
    bp = bread(ip->dev, ip->addrs[NDIRECT]);
    a = (uint*)bp->data;
    for(j = 0; j < NINDIRECT; j++){
      if(a[j])
        bfree(ip->dev, a[j]);
    }
    brelse(bp);
    bfree(ip->dev, ip->addrs[NDIRECT]);
    ip->addrs[NDIRECT] = 0;
  }
 
  // second indirect
  if (ip->addrs[NDIRECT + 1]) {
    bp = bread(ip->dev, ip->addrs[NDIRECT + 1]);
    a = (uint*)bp->data;
 
    for(i = 0; i < NINDIRECT; i++) {
      if(a[i]) {
        // Load second indirect block
        struct buf *bp2 = bread(ip->dev, a[i]);
        uint *a2 = (uint*)bp2->data;
 
        for(j = 0; j < NINDIRECT; j++){
          if(a2[j])
            bfree(ip->dev, a2[j]);
        }
 
        brelse(bp2);
        bfree(ip->dev, a[i]);
      }
    }
 
    brelse(bp);
    bfree(ip->dev, ip->addrs[NDIRECT + 1]);
    ip->addrs[NDIRECT + 1] = 0;
  }
 
  ip->size = 0;
  iupdate(ip);
}

结果:

$ bigfile
..................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
wrote 65803 blocks
bigfile done; ok

soft link 和普通文件一样,也占用一个 inode

实现 syscall 时,主要参考之前加了什么,看看都用到了哪里,还有参考 link 和 unlink

根据添加的 define 可以找到 sys_opencreate

看到 create 就猜到大概可以怎么实现,创建一个类型为 T_SYMLINK 的文件即可。再根据提示写入 target 到 data block 里面,不需要 check target,根据其他实现给 ip 加了个锁,然后最后才发现在外面已经上锁了不能在里面上锁,不然测试时候会死锁,这是看网上实现才发现的

open 就顾名思义,更改遇到 symlink 时候 follow 能 follow 的,并且加一个上限就行
实现时候的锁不小心锁多了一个(复制剪切时候没复制全),再次死锁,但是通过 gdb 能解决

总的来说,不管是文件系统还是 DB 都会遇到死锁问题,以后还是得好好尝试怎么处理死锁

非常想念 golang 的 defer,清理资源容易太多了

// kernel/fcntl.h
#define O_NOFOLLOW 0x800
 
// kernel/sysfile.c
uint64 sys_symlink(void) {
    char target[MAXPATH], path[MAXPATH];
    struct inode *ip;
 
    if (argstr(0, target, MAXPATH) < 0 || argstr(1, path, MAXPATH) < 0)
        return -1;
 
    begin_op();
    ip = create(path, T_SYMLINK, 0, 0);
    if (ip == 0) {
        end_op();
        return -1;
    }
 
    // ilock(ip);
    if (writei(ip, 0, (uint64)target, 0, strlen(target)) != strlen(target)) {
        iunlockput(ip);
        end_op();
        return -1;
    }
    iunlockput(ip);
    end_op();
 
    return 0;
}
 
uint64 sys_open(void) 
{
	// ...
    begin_op();
    if (omode & O_CREATE) {
        ip = create(path, T_FILE, 0, 0);
        if (ip == 0) {
            end_op();
            return -1;
        }
    } else {
        int depth = 0;
        while (1) {
			if ((ip = namei(path)) == 0) {
				end_op();
				return -1;
			}
 
			ilock(ip);
			if ((ip->type == T_SYMLINK) && (omode & O_NOFOLLOW) == 0) { // if symlink and can follow
				if (++depth > 10) {
					iunlockput(ip);
					end_op();
					return -1;
				}
				if(readi(ip, 0, (uint64)path, 0, MAXPATH) < 0) {
					iunlockput(ip);
					end_op();
					return -1;
				}
				iunlockput(ip);
			} else {
				break;
			}
		}
 
        if (ip->type == T_DIR && omode != O_RDONLY) {
            iunlockput(ip);
            end_op();
            return -1;
        }
    }
	
	// ...
 
    iunlock(ip);
    end_op();
 
    return fd;
}

结果:

$ symlinktest
Start: test symlinks
test symlinks: ok
Start: test concurrent symlinks
test concurrent symlinks: ok

总结

  • 真的很容易一下就死锁,像 golang 那样的 defer cleanup 真的很好用
  • 函数命名真的挺重要的,像这种老牌 unix hacker 写的函数不加注释完全看不懂,加了也只能一知半解
  • 熟悉了 side effect / pure function,通过实例了解 FP 对测试和重构的影响
  • 这种大量 hard code constant 的做法可靠性存疑,容易改了一个漏改另一个
  • 稍微熟悉了一点 fs,特别是 symbolic link 里面的递归 link