roll-forwards : review and minor fixes / cleanups.

author NeilBrown <neilb@suse.de>

Mon, 11 Oct 2010 01:47:26 +0000 (12:47 +1100)

committer Neil Brown <neilb@nbeee.brown>

Mon, 11 Oct 2010 07:13:09 +0000 (18:13 +1100)
author NeilBrown <neilb@suse.de>
Mon, 11 Oct 2010 01:47:26 +0000 (12:47 +1100)
committer Neil Brown <neilb@nbeee.brown>
Mon, 11 Oct 2010 07:13:09 +0000 (18:13 +1100)
diff --git a/README b/README

index 21ee12fbed352333ba672d6765f928d74487e6d9..44150dfb4f67ab778b3af78ea76fe13916cd9367 100644 (file)
--- a/README
+++ b/README
@@ -5167,9 +5167,9 @@ DONE 15bi/ Set EmergencyClean a bit later - need at least one checkpoint first.
  
  15bn/ How does refcounting of 'struct fs' work with multiple filesets?
  
-15bo/ use put_super to drop last refer to superblocks
+DONE 15bo/ use put_super to drop last refer to superblocks
  
-15bp/ review all superblocks - maybe use more anon??
+DONE 15bp/ review all superblocks - maybe use more anon??
  
  15bq/ check readonly status in lafs_get_sb
  
@@ -5213,7 +5213,7 @@ DONE 15bi/ Set EmergencyClean a bit later - need at least one checkpoint first.
          - unusual conditions I want a warning of
          - data corruption errors
  
-15cf/ lafs_iget_fs need to sometimes to in-kernel mounts for subset filesystems
+DONE 15cf/ lafs_iget_fs need to sometimes to in-kernel mounts for subset filesystems
       This is needed for the cleaner - the cleaner needs to hold a ref somehow.
  
  15cg/ lafs_sync_inode is weird - why the lafs_checkpoint_start and update_cluster
@@ -5291,8 +5291,23 @@ DONE 15bi/ Set EmergencyClean a bit later - need at least one checkpoint first.
        - how to destroy
  
  36/ review roll-forward
-      make sure files with nlink == 0 are handled well
-      sanity check before trusting clusters
+
+36a/  make sure files with nlink == 0 are handled well
+DONE 36b/  sanity check before trusting clusters
+36c/ handle miniblocks which create new inodes.
+36d/ Handle DescHole in roll_block
+36e/ When dirtying a block in roll_block, maybe use writeback rather
+     than just iolock, for consistency...
+37f/ What to do if table becomes full when add_block_address in
+     roll_block ??
+37g/ Write roll_mini for directories.
+37h/ In roll_one, use the cluster counting code to find block number and
+     make sure we don't exceed the segment.
+37i/ add more general error checking to lafs_mount - 
+            lafs_iget orphans and segsum.  Check type is correct.
+         errors from lafs_count_orphans or lafs_add_orphans.
+         alloc_page failure for chead - maybe allocate something bigger??
+
  
  37/ Configure index block hash_table at run time base on mem size??
  
diff --git a/inode.c b/inode.c

index acc9261ea813c44719d0ac92e4be1f2c7e28f076..759365c59e17c76cc2f9235e98b75a5b79d4accd 100644 (file)
--- a/inode.c
+++ b/inode.c
@@ -304,6 +304,8 @@ lafs_import_inode(struct inode *ino, struct datablock *b)
                         = i->quota_inodes[2] = NULL;
                 nlen = li->metadata_size - offsetof(struct la_inode,
                                                     metadata[0].fs.name);
+               if (i->name)
+                       kfree(i->name);
                 if (nlen == 0)
                         i->name = NULL;
                 else {
diff --git a/roll.c b/roll.c

index b28e5e3d1df77a3dd31e35f36f3389cbb9c8a4ef..ca80cf73c82c636d0b6af548e634ab0d1020ca9d 100644 (file)
--- a/roll.c
+++ b/roll.c
@@ -1,11 +1,53 @@
  
  /*
   * fs/lafs/roll.c
- * Copyright (C) 2005-2009
+ * Copyright (C) 2005-2010
   * Neil Brown <neilb@suse.de>
   * Released under the GPL, version 2
   *
   * 'rollforward'
+ *
+ * This file handles mounting of a filesystem once the superblocks
+ * have been loaded.
+ * It loads the root inode (the root of the filesystem, not of the
+ * directory tree) and then handles roll-forward to pick up and changes
+ * there are not in the filesystem yet, either due to a crash, or because
+ * they cannot be consistently stored easily (final segusage/quota info).
+ *
+ * Roll-forward reads write-cluster header and handle things as appropriate.
+ * Data blocks are only processed if they belong to:
+ *   - regular files
+ *   - segusage files
+ *   - quota files.
+ * A data block in a regular file implies an extension of the file size
+ * to the end of the block, if it was previously at or before the start
+ * of the block.  Datablocks that were just moved for cleaning are
+ * ignored.
+ *
+ * Index blocks are always ignored - they need to be recalculated.
+ *
+ * 'miniblocks' or 'updates' are always processed - they represent an
+ * atomic update that might affect multiple files - those files for which
+ * data blocks are ignored.
+ * Updates are understood:
+ * - for inodes.  The update simply over-writes part of the inode metadata,
+ *       which could affect the link count or size.  Such inodes become
+ *       orphans in case truncation or deletion is needed.  This can create
+ *       an inode which might affect the inode usage map.
+ * - for directories.  The update identifies a name and an inode number.
+ *       This can imply a change to the inode's link count and again could
+ *       make it an orphan.  In some cases updates are paired, possibly across
+ *       different directories.  This is needed for 'rename'.
+ *
+ * Each write-cluster has three levels of validation.
+ *  Firstly, if the header is internally consistent, with correct tag,
+ *   uuid, and sequence, then we know a write was attempted, and anything that
+ *   must be written before that was successfully written.
+ *  Secondly, if the header has a correct checksum, then it is all correct,
+ *   and the miniblocks are valid.
+ *  Thirdly, if the next or next-but-one header (depending on verify_type) is
+ *   internally consistent, than we know that the data blocks in this cluster
+ *   were all written successfully.
   */
  
  #include       "lafs.h"
@@ -39,18 +81,23 @@ roll_valid(struct fs *fs, struct cluster_head *ch, unsigned long long addr)
  }
  
  /*
+ * roll_locate scopes out the full extent of the required roll-forward.
+ * It start at the start of the last checkpoint (recorded in the stateblock)
+ * and checks that the end of the checkpoint exists, and continues following
+ * the chain as far as valid cluster heads can be found.
   * roll_locate returns 0 if proper endpoints were found,
- * or -EINVAL?? if CheckpointStart and CheckpointEnd weren't found properly
+ * or -EIO if CheckpointStart and CheckpointEnd weren't found properly
   * "next" will contain the address of the next cluster to be written to,
   * "last" the cluster before that, and "seq" the seq number for next cluster
+ * "maxp" will be used to report the maximum size of a cluster head.
   */
  static int
  roll_locate(struct fs *fs, u64 start,
-           u64 *next, u64 *lastp, u64 *seqp,
+           u64 *nextp, u64 *lastp, u64 *seqp,
             int *maxp, struct page *p)
  {
         struct cluster_head *ch;
-       u64 this, prev, prev2, last;
+       u64 this, prev, prev2, last, next;
         u64 seq = 0;
         int max = 0;
         int prevtype, prev2type;
@@ -59,6 +106,9 @@ roll_locate(struct fs *fs, u64 start,
  
         this = start; prev = start;
  
+       /* First we walk through the checkpoint section, which should
+        * all be valid.
+        */
         do {
                 if (lafs_load_page(fs, p, this, 1) != 0) {
                         printk(KERN_ERR "LaFS: Could not read cluster %llu\n",
@@ -109,7 +159,7 @@ roll_locate(struct fs *fs, u64 start,
         /* 'seq' is sequence number of 'this' */
         dprintk("CheckpointEnd found at %llu, seq %llu\n", prev, seq-1);
  
-       /* now we need to step forward a bit more carefully. as any
+       /* now we need to step forward a bit more carefully, as any
          * cluster we find now could easily be bad.
          * We keep:
          *   this - address of cluster we are now considering
@@ -123,7 +173,7 @@ roll_locate(struct fs *fs, u64 start,
          */
  
         last = prev;
-       start = this;
+       next = this;
         prev2 = prev;
         prevtype = prev2type = VerifyNull;
  
@@ -136,7 +186,7 @@ roll_locate(struct fs *fs, u64 start,
                         break;
                 if (le64_to_cpu(ch->seq) != seq)
                         break;
-               /* FIXME check checksum, and possibly VerifySum */
+
                 /* this head looks valid, so we can possibly verify previous
                  * clusters
                  */
@@ -144,32 +194,35 @@ roll_locate(struct fs *fs, u64 start,
                         max = le16_to_cpu(ch->Hlength);
  
                 if (prev2type == VerifyNext2) {
-                       start = prev; last = prev2;
+                       next = prev;
+                       last = prev2;
                 }
                 if (prevtype == VerifyNext) {
-                       start = this;
+                       next = this;
                         last = prev;
                 }
  
                 /* shift prev info back */
-               prev2 = prev; prev2type = prevtype;
-               prev = this ; prevtype = le16_to_cpu(ch->verify_type);
+               prev2 = prev;
+               prev2type = prevtype;
+               prev = this;
+               prevtype = le16_to_cpu(ch->verify_type);
                 this = le64_to_cpu(ch->next_addr);
                 if (prevtype == VerifyNull) {
-                       start = this;
+                       next = this;
                         last = prev;
                 }
                 seq++;
         }
  
-       dprintk("LaFS: Next address to write is %llu\n", start);
-       *next = start;
+       dprintk("LaFS: Next address to write is %llu\n", next);
+       *nextp = next;
         *lastp = last;
-       if (start == this)
+       if (next == this)
                 *seqp = seq;
-       else if (start == prev)
+       else if (next == prev)
                 *seqp = seq-1;
-       else if (start == prev2)
+       else if (next == prev2)
                 *seqp = seq-2;
         else
                 BUG();
@@ -213,21 +266,37 @@ roll_mini(struct fs *fs, int fsnum, int inum, int trunc, int flg,
         li = LAFSI(inode);
  
         switch (li->type) {
+       default: /* Any unknown type is an error */
+               printk(KERN_WARNING "LAFS impossibly file type for roll-forward: %d\n",
+                      li->type);
+               err = -EIO;
+               break;
+
         case TypeInodeFile:
  
-               BUG_ON(fsnum); /* FIXME should be more careful */
+               if (fsnum) {
+                       printk(KERN_WARNING "LAFS: Ignoring impossible sub-subset\n");
+                       break;
+               }
+
                 lafs_iput_fs(inode);
                 inode = lafs_iget_fs(fs, inum, bnum, SYNC);
                 if (IS_ERR(inode)) {
                         err = PTR_ERR(inode);
-                       if (err == -EIO && offset == 0) {
-                               /* creating new inode */
-                       }
-                       return PTR_ERR(inode);
+                       if (err != -ENOENT || offset != 0)
+                               return err;
+
+                       /* FIXME creating new inode */
+                       BUG();
                 }
                 db = lafs_inode_dblock(inode, SYNC, MKREF(roll));
+               if (IS_ERR(db)) {
+                       err = PTR_ERR(db);
+                       break;
+               }
+               /* Make sure block is in-sync with inode */
+               lafs_inode_fillblock(inode);
                 buf = map_dblock(db);
-               /* FIXME do I sync the inode back to the datablock first? */
                 memcpy(buf+offset, data, len);
                 unmap_dblock(db, buf);
                 err = lafs_import_inode(inode, db);
@@ -243,6 +312,11 @@ roll_mini(struct fs *fs, int fsnum, int inum, int trunc, int flg,
                 }
                 putdref(db, MKREF(roll));
                 break;
+
+       case TypeDir:
+               /* Haven't written this yet FIXME */
+               BUG();
+               break;
         }
         lafs_iput_fs(inode);
         return err;
@@ -250,7 +324,7 @@ roll_mini(struct fs *fs, int fsnum, int inum, int trunc, int flg,
  
  static int __must_check
  roll_block(struct fs *fs, int fsnum, int inum, int trunc, int flg,
-          u32 bnum, u64 baddr, int type, struct page *p)
+          u32 bnum, u64 baddr, int bytes, struct page *p)
  {
         struct inode *inode;
         struct datablock *blk = NULL;
@@ -259,9 +333,9 @@ roll_block(struct fs *fs, int fsnum, int inum, int trunc, int flg,
  
         if (flg)
                 return 0; /* "old" blocks aren't interesting */
-       if (type == DescIndex)
+       if (bytes == DescIndex)
                 return 0; /* index blocks aren't interesting either */
-       if (type == DescHole)
+       if (bytes == DescHole)
                 return 0; /* FIXME should I punch a hole here? */
  
         dprintk("Roll Block %d/%d/%d/%lu/%llu\n",
@@ -273,8 +347,6 @@ roll_block(struct fs *fs, int fsnum, int inum, int trunc, int flg,
         if (IS_ERR(inode))
                 return PTR_ERR(inode);
  
-       /* FIXME do I need to 'lock' the inode in any way? */
-
         /* check type */
         li = LAFSI(inode);
  
@@ -342,12 +414,11 @@ roll_block(struct fs *fs, int fsnum, int inum, int trunc, int flg,
                         /* already correctly indexed */
                         break;
  
-               /* FIXME do I need to dirty the inode to flush
-                * this change into the datablock?
-                */
                 if (li->type >= TypeBase &&
-                   inode->i_size <= ((loff_t)bnum << inode->i_blkbits))
-                       inode->i_size = ((loff_t)bnum << inode->i_blkbits) + type;
+                   inode->i_size <= ((loff_t)bnum << inode->i_blkbits)) {
+                       inode->i_size = ((loff_t)bnum << inode->i_blkbits) + bytes;
+                       set_bit(I_Dirty, &LAFSI(inode)->iflags);
+               }
  
                 /* FIXME: we pretend this is a dirty, pinned block
                  * so the lower-level code doesn't get confused.
diff --git a/super.c b/super.c

index bdfbe1c08af23fb1fb14c582b9a51e5f3fb73bf9..5ec7029b262b6097387ba63a52cb74645ab938b6 100644 (file)
--- a/super.c
+++ b/super.c
@@ -1250,6 +1250,7 @@ static struct inode *lafs_alloc_inode(struct super_block *sb)
         li->iblock = NULL;
         li->dblock = NULL;
         li->update_cluster = 0;
+       li->md.fs.name = NULL;
  
         init_rwsem(&li->ind_sem);
         INIT_LIST_HEAD(&li->free_index);
author	NeilBrown <neilb@suse.de>
	Mon, 11 Oct 2010 01:47:26 +0000 (12:47 +1100)
committer	Neil Brown <neilb@nbeee.brown>
	Mon, 11 Oct 2010 07:13:09 +0000 (18:13 +1100)
README		patch \| blob \| history
inode.c		patch \| blob \| history
roll.c		patch \| blob \| history
super.c		patch \| blob \| history