cleaner: use lafs_get_flushable instead of opencoding similar functionality.

author NeilBrown <neilb@suse.de>

Mon, 16 Mar 2009 01:58:16 +0000 (12:58 +1100)

committer NeilBrown <neilb@suse.de>

Mon, 16 Mar 2009 01:58:16 +0000 (12:58 +1100)
author NeilBrown <neilb@suse.de>
Mon, 16 Mar 2009 01:58:16 +0000 (12:58 +1100)
committer NeilBrown <neilb@suse.de>
Mon, 16 Mar 2009 01:58:16 +0000 (12:58 +1100)
diff --git a/README b/README

index ba7edbb2edbd774ceeb781c55fb673b64503bfea..57d764133fe9b5cba36d921d89546f8e8e666b82 100644 (file)
--- a/README
+++ b/README
@@ -1974,3 +1974,83 @@ df: tot=4608 free=4257 avail=4247(4458-211) cb=350 pb=0 ab=1
  Oh well.
  ====================34 credits ==============================
  df: tot=4608 free=4257 avail=3176(3373-197) cb=350 pb=0 ab=1
+
+It seems that the unaccounted blocks are (or can be) created by
+writing to a file then removing the file without a sync.
+..but why is cb (cblocks_used) so high?
+
+27th February 2009
+
+ Got onto a bit of a tangent...
+ What happens if we truncate a block while it is on a list to
+ be cleaned?  Clearly we want to cleaner to drop it ASAP.
+ But what if invalidate_page wants to drop it *now*
+ Hopefully it is either still on clean_leafs and we can remove it,
+ or it is now iolocked and we can wait for it.  So should be OK.
+
+ I keep getting caught in "looping on..."
+ We are truncating an inode and some index block which is now empty
+ is not getting removed from the tree because there is an outstanding
+ reference.... 327/0 depth=1.  I guess I turn on the tracing.
+
+ ... and it seems that it is in the process of checkpointing.
+ I guess I need to lock against that ... maybe with the iolock.
+
+Credits = -1, rv=2
+ib = [ce814e40]328/0(2552)r3:Index(1),Pinned,Phase1,Valid,Dirty,CI,CN,CNI,UninCredit,PhysValid,Uninc{0,0}[0]
+------------[ cut here ]------------
+kernel BUG at /home/neilb/work/nfsbrick/fs/module/segments.c:371!
+
+ -------
+ Every time I create/delete a file, I get an extra 'ab' which disappears
+ on 'sync'.
+   ablocks_used is:
+     decremented when +ve summary_update on non-index
+     increased on lafs_summary_allocate... should not be done for index blocks.
+
+ OK:  after test run, filesystem is empty, but cblocks_used is around 360.
+  cblocks_used:
+        is loaded at mount time
+        collects pblocks_used on a phase flip
+        is updated in lafs_summary_update (unless pblocks is)
+   So we must be missing a lafs_summary_update when phys->0
+
+
+ Lots of problem:
+   truncating big (multi-level index) seems to be bad
+     Leaves 'pb-338 !!! and cb+689, even after sync.
+   still 'looping on' occasionally
+   Haven't found cblocks_used leak yet.
+   Occasionally non-B_Valid blocks are actted on.
+     I think I need to improve io locking.
+
+---------------
+1st March 2009
+  Need some improvements to iolock locking.
+  We use this lock to wait for a block to be written out (if that is happening)
+   before we allow lafs_invalidate_page to complete.n
+   It is also use in lafs_erase_{d,i}block (Similar purpose)
+  We take the lock in lafs_cluster_allocate, and then make sure the block is
+   still dirty.
+
+  Also lock in lafs_new_inode as initing the inode is a form of IO ??
+  load_block takes the lock
+  We only clear_bit(B_Valid, ) under this lock.
+
+  So the issue is this:
+    A block that is going to be written is passed to lafs_cluster_allocate.
+    This happens either after taking it of a _leafs list, or when 
+    lafs_writepage requests the write.
+
+    lafs_invalidate_page needs to be able to release the page, so there needs to
+    be no transient references.  In particular, once the block has been
+    removed from a _leafs list it must already be iolocked.
+    Invalidate_page can then either remove from that list and erase the block,
+    or use io_lock_block to wait for the IO to complete.
+  So when a datablock comes out of get_flushable it must be iolocked, and must
+  remain iolocked until after Dirty and Alloc are clear
+  Index blocks belong entirely to the fs, so we can be more relaxed with them.
+  If get_flushable finds the block already iolocked, it is either being invalidated
+  or already has IO pending, so it can be dropped.
+
+
diff --git a/checkpoint.c b/checkpoint.c

index 8f64ddb928d204d5f9fd88eb2a5d5ee019eea2cf..7d87d4b8de87d79d8cbc75455dda9c6b1cbb9391 100644 (file)
--- a/checkpoint.c
+++ b/checkpoint.c
@@ -269,13 +269,17 @@ static int prepare_checkpoint(struct fs *fs)
         return youth;
  }
  
-static inline struct block *get_flushable(struct fs *fs, int phase)
+/* Get a block that should be written out.
+ * If phase is '-1', then only return cleanable blocks, else
+ * also return blocks for that phase
+ */
+struct block *lafs_get_flushable(struct fs *fs, int phase)
  {
         struct block *b;
         spin_lock(&fs->lock);
         if (!list_empty(&fs->clean_leafs))
                 b = list_entry(fs->clean_leafs.next, struct block, lru);
-       else if (!list_empty(&fs->phase_leafs[phase]))
+       else if (phase >= 0 && !list_empty(&fs->phase_leafs[phase]))
                 b = list_entry(fs->phase_leafs[phase].next, struct block, lru);
         else
                 b = NULL;
@@ -299,7 +303,7 @@ static void do_checkpoint(void *data)
  
         dprintk("Start Checkpoint\n");
         if (lafs_trace) lafs_dump_tree();
-       while ( (b = get_flushable(fs, oldphase)) != NULL) {
+       while ( (b = lafs_get_flushable(fs, oldphase)) != NULL) {
  
  dprintk("Block %d/%d idx=%d\n", (int)b->inode->i_ino, (int)b->fileaddr,
         test_bit(B_Index, &b->flags));
diff --git a/clean.c b/clean.c

index c7ee877e54fa88ccb292bff926c18facffaafe2f..fb0405af6406d18626192d5e8186299c29d36af5 100644 (file)
--- a/clean.c
+++ b/clean.c
@@ -219,18 +219,11 @@ static struct block *first_in_seg(struct block *b, struct fs *fs, int dev, u32 s
   */
  static void cleaner_flush(struct fs *fs)
  {
+       struct block *b;
         dprintk("Start cleaner_flush\n");
-       spin_lock(&fs->lock);
-       while (!list_empty(&fs->clean_leafs)) {
-               char *strflags(struct block *b);
-               struct block *b = list_entry(fs->clean_leafs.next,
-                                            struct block, lru);
-
-               list_del_init(&b->lru);
-               /* We now own that reference */
-               spin_unlock(&fs->lock);
-               dprintk("cleaning %d/%d: %s\n", (int)b->inode->i_ino,
-                      (int)b->fileaddr, strflags(b));
+       while ( (b = lafs_get_flushable(fs, -1)) != NULL) {
+
+               dprintk("cleaning %s\n", strblk(b));
  
  //printk("[");
                 if (test_bit(B_Dirty, &b->flags)) {
@@ -256,11 +249,10 @@ static void cleaner_flush(struct fs *fs)
                         // FIXME wait??
                         lafs_cluster_wait_all(fs);
                 }
-               spin_lock(&fs->lock);
  //printk("\n");
         }
-       spin_unlock(&fs->lock);
  }
+
  static int try_clean(struct fs *fs, struct toclean *tc)
  {
         /* return 1 if everything has been found, -ve if we need to flush */
diff --git a/lafs.h b/lafs.h

index ac4b4370bef0d16ee4ac5943c144abe275592c71..0bab49d9c7cd2ac8a4ca6bfe8aa46df8cdb89f66 100644 (file)
--- a/lafs.h
+++ b/lafs.h
@@ -514,6 +514,7 @@ void lafs_checkpoint_unlock_wait(struct fs *fs);
  void lafs_phase_wait(struct block *b);
  unsigned long long lafs_checkpoint_start(struct fs *fs);
  unsigned long lafs_do_checkpoint(struct fs *fs);
+struct block *lafs_get_flushable(struct fs *fs, int phase);
  
  int lafs_orphan_prepare(struct fs *fs, struct orphan_info *oi);
  int lafs_orphan_pin(struct orphan_info *oi, struct datablock *b, int n);
author	NeilBrown <neilb@suse.de>
	Mon, 16 Mar 2009 01:58:16 +0000 (12:58 +1100)
committer	NeilBrown <neilb@suse.de>
	Mon, 16 Mar 2009 01:58:16 +0000 (12:58 +1100)
README		patch \| blob \| history
checkpoint.c		patch \| blob \| history
clean.c		patch \| blob \| history
lafs.h		patch \| blob \| history