Oh well.
====================34 credits ==============================
df: tot=4608 free=4257 avail=3176(3373-197) cb=350 pb=0 ab=1
+
+It seems that the unaccounted blocks are (or can be) created by
+writing to a file then removing the file without a sync.
+..but why is cb (cblocks_used) so high?
+
+27th February 2009
+
+ Got onto a bit of a tangent...
+ What happens if we truncate a block while it is on a list to
+ be cleaned? Clearly we want to cleaner to drop it ASAP.
+ But what if invalidate_page wants to drop it *now*
+ Hopefully it is either still on clean_leafs and we can remove it,
+ or it is now iolocked and we can wait for it. So should be OK.
+
+ I keep getting caught in "looping on..."
+ We are truncating an inode and some index block which is now empty
+ is not getting removed from the tree because there is an outstanding
+ reference.... 327/0 depth=1. I guess I turn on the tracing.
+
+ ... and it seems that it is in the process of checkpointing.
+ I guess I need to lock against that ... maybe with the iolock.
+
+Credits = -1, rv=2
+ib = [ce814e40]328/0(2552)r3:Index(1),Pinned,Phase1,Valid,Dirty,CI,CN,CNI,UninCredit,PhysValid,Uninc{0,0}[0]
+------------[ cut here ]------------
+kernel BUG at /home/neilb/work/nfsbrick/fs/module/segments.c:371!
+
+ -------
+ Every time I create/delete a file, I get an extra 'ab' which disappears
+ on 'sync'.
+ ablocks_used is:
+ decremented when +ve summary_update on non-index
+ increased on lafs_summary_allocate... should not be done for index blocks.
+
+ OK: after test run, filesystem is empty, but cblocks_used is around 360.
+ cblocks_used:
+ is loaded at mount time
+ collects pblocks_used on a phase flip
+ is updated in lafs_summary_update (unless pblocks is)
+ So we must be missing a lafs_summary_update when phys->0
+
+
+ Lots of problem:
+ truncating big (multi-level index) seems to be bad
+ Leaves 'pb-338 !!! and cb+689, even after sync.
+ still 'looping on' occasionally
+ Haven't found cblocks_used leak yet.
+ Occasionally non-B_Valid blocks are actted on.
+ I think I need to improve io locking.
+
+---------------
+1st March 2009
+ Need some improvements to iolock locking.
+ We use this lock to wait for a block to be written out (if that is happening)
+ before we allow lafs_invalidate_page to complete.n
+ It is also use in lafs_erase_{d,i}block (Similar purpose)
+ We take the lock in lafs_cluster_allocate, and then make sure the block is
+ still dirty.
+
+ Also lock in lafs_new_inode as initing the inode is a form of IO ??
+ load_block takes the lock
+ We only clear_bit(B_Valid, ) under this lock.
+
+ So the issue is this:
+ A block that is going to be written is passed to lafs_cluster_allocate.
+ This happens either after taking it of a _leafs list, or when
+ lafs_writepage requests the write.
+
+ lafs_invalidate_page needs to be able to release the page, so there needs to
+ be no transient references. In particular, once the block has been
+ removed from a _leafs list it must already be iolocked.
+ Invalidate_page can then either remove from that list and erase the block,
+ or use io_lock_block to wait for the IO to complete.
+ So when a datablock comes out of get_flushable it must be iolocked, and must
+ remain iolocked until after Dirty and Alloc are clear
+ Index blocks belong entirely to the fs, so we can be more relaxed with them.
+ If get_flushable finds the block already iolocked, it is either being invalidated
+ or already has IO pending, so it can be dropped.
+
+
return youth;
}
-static inline struct block *get_flushable(struct fs *fs, int phase)
+/* Get a block that should be written out.
+ * If phase is '-1', then only return cleanable blocks, else
+ * also return blocks for that phase
+ */
+struct block *lafs_get_flushable(struct fs *fs, int phase)
{
struct block *b;
spin_lock(&fs->lock);
if (!list_empty(&fs->clean_leafs))
b = list_entry(fs->clean_leafs.next, struct block, lru);
- else if (!list_empty(&fs->phase_leafs[phase]))
+ else if (phase >= 0 && !list_empty(&fs->phase_leafs[phase]))
b = list_entry(fs->phase_leafs[phase].next, struct block, lru);
else
b = NULL;
dprintk("Start Checkpoint\n");
if (lafs_trace) lafs_dump_tree();
- while ( (b = get_flushable(fs, oldphase)) != NULL) {
+ while ( (b = lafs_get_flushable(fs, oldphase)) != NULL) {
dprintk("Block %d/%d idx=%d\n", (int)b->inode->i_ino, (int)b->fileaddr,
test_bit(B_Index, &b->flags));
*/
static void cleaner_flush(struct fs *fs)
{
+ struct block *b;
dprintk("Start cleaner_flush\n");
- spin_lock(&fs->lock);
- while (!list_empty(&fs->clean_leafs)) {
- char *strflags(struct block *b);
- struct block *b = list_entry(fs->clean_leafs.next,
- struct block, lru);
-
- list_del_init(&b->lru);
- /* We now own that reference */
- spin_unlock(&fs->lock);
- dprintk("cleaning %d/%d: %s\n", (int)b->inode->i_ino,
- (int)b->fileaddr, strflags(b));
+ while ( (b = lafs_get_flushable(fs, -1)) != NULL) {
+
+ dprintk("cleaning %s\n", strblk(b));
//printk("[");
if (test_bit(B_Dirty, &b->flags)) {
// FIXME wait??
lafs_cluster_wait_all(fs);
}
- spin_lock(&fs->lock);
//printk("\n");
}
- spin_unlock(&fs->lock);
}
+
static int try_clean(struct fs *fs, struct toclean *tc)
{
/* return 1 if everything has been found, -ve if we need to flush */