15bn/ How does refcounting of 'struct fs' work with multiple filesets?
-15bo/ use put_super to drop last refer to superblocks
+DONE 15bo/ use put_super to drop last refer to superblocks
-15bp/ review all superblocks - maybe use more anon??
+DONE 15bp/ review all superblocks - maybe use more anon??
15bq/ check readonly status in lafs_get_sb
- unusual conditions I want a warning of
- data corruption errors
-15cf/ lafs_iget_fs need to sometimes to in-kernel mounts for subset filesystems
+DONE 15cf/ lafs_iget_fs need to sometimes to in-kernel mounts for subset filesystems
This is needed for the cleaner - the cleaner needs to hold a ref somehow.
15cg/ lafs_sync_inode is weird - why the lafs_checkpoint_start and update_cluster
- how to destroy
36/ review roll-forward
- make sure files with nlink == 0 are handled well
- sanity check before trusting clusters
+
+36a/ make sure files with nlink == 0 are handled well
+DONE 36b/ sanity check before trusting clusters
+36c/ handle miniblocks which create new inodes.
+36d/ Handle DescHole in roll_block
+36e/ When dirtying a block in roll_block, maybe use writeback rather
+ than just iolock, for consistency...
+37f/ What to do if table becomes full when add_block_address in
+ roll_block ??
+37g/ Write roll_mini for directories.
+37h/ In roll_one, use the cluster counting code to find block number and
+ make sure we don't exceed the segment.
+37i/ add more general error checking to lafs_mount -
+ lafs_iget orphans and segsum. Check type is correct.
+ errors from lafs_count_orphans or lafs_add_orphans.
+ alloc_page failure for chead - maybe allocate something bigger??
+
37/ Configure index block hash_table at run time base on mem size??
/*
* fs/lafs/roll.c
- * Copyright (C) 2005-2009
+ * Copyright (C) 2005-2010
* Neil Brown <neilb@suse.de>
* Released under the GPL, version 2
*
* 'rollforward'
+ *
+ * This file handles mounting of a filesystem once the superblocks
+ * have been loaded.
+ * It loads the root inode (the root of the filesystem, not of the
+ * directory tree) and then handles roll-forward to pick up and changes
+ * there are not in the filesystem yet, either due to a crash, or because
+ * they cannot be consistently stored easily (final segusage/quota info).
+ *
+ * Roll-forward reads write-cluster header and handle things as appropriate.
+ * Data blocks are only processed if they belong to:
+ * - regular files
+ * - segusage files
+ * - quota files.
+ * A data block in a regular file implies an extension of the file size
+ * to the end of the block, if it was previously at or before the start
+ * of the block. Datablocks that were just moved for cleaning are
+ * ignored.
+ *
+ * Index blocks are always ignored - they need to be recalculated.
+ *
+ * 'miniblocks' or 'updates' are always processed - they represent an
+ * atomic update that might affect multiple files - those files for which
+ * data blocks are ignored.
+ * Updates are understood:
+ * - for inodes. The update simply over-writes part of the inode metadata,
+ * which could affect the link count or size. Such inodes become
+ * orphans in case truncation or deletion is needed. This can create
+ * an inode which might affect the inode usage map.
+ * - for directories. The update identifies a name and an inode number.
+ * This can imply a change to the inode's link count and again could
+ * make it an orphan. In some cases updates are paired, possibly across
+ * different directories. This is needed for 'rename'.
+ *
+ * Each write-cluster has three levels of validation.
+ * Firstly, if the header is internally consistent, with correct tag,
+ * uuid, and sequence, then we know a write was attempted, and anything that
+ * must be written before that was successfully written.
+ * Secondly, if the header has a correct checksum, then it is all correct,
+ * and the miniblocks are valid.
+ * Thirdly, if the next or next-but-one header (depending on verify_type) is
+ * internally consistent, than we know that the data blocks in this cluster
+ * were all written successfully.
*/
#include "lafs.h"
}
/*
+ * roll_locate scopes out the full extent of the required roll-forward.
+ * It start at the start of the last checkpoint (recorded in the stateblock)
+ * and checks that the end of the checkpoint exists, and continues following
+ * the chain as far as valid cluster heads can be found.
* roll_locate returns 0 if proper endpoints were found,
- * or -EINVAL?? if CheckpointStart and CheckpointEnd weren't found properly
+ * or -EIO if CheckpointStart and CheckpointEnd weren't found properly
* "next" will contain the address of the next cluster to be written to,
* "last" the cluster before that, and "seq" the seq number for next cluster
+ * "maxp" will be used to report the maximum size of a cluster head.
*/
static int
roll_locate(struct fs *fs, u64 start,
- u64 *next, u64 *lastp, u64 *seqp,
+ u64 *nextp, u64 *lastp, u64 *seqp,
int *maxp, struct page *p)
{
struct cluster_head *ch;
- u64 this, prev, prev2, last;
+ u64 this, prev, prev2, last, next;
u64 seq = 0;
int max = 0;
int prevtype, prev2type;
this = start; prev = start;
+ /* First we walk through the checkpoint section, which should
+ * all be valid.
+ */
do {
if (lafs_load_page(fs, p, this, 1) != 0) {
printk(KERN_ERR "LaFS: Could not read cluster %llu\n",
/* 'seq' is sequence number of 'this' */
dprintk("CheckpointEnd found at %llu, seq %llu\n", prev, seq-1);
- /* now we need to step forward a bit more carefully. as any
+ /* now we need to step forward a bit more carefully, as any
* cluster we find now could easily be bad.
* We keep:
* this - address of cluster we are now considering
*/
last = prev;
- start = this;
+ next = this;
prev2 = prev;
prevtype = prev2type = VerifyNull;
break;
if (le64_to_cpu(ch->seq) != seq)
break;
- /* FIXME check checksum, and possibly VerifySum */
+
/* this head looks valid, so we can possibly verify previous
* clusters
*/
max = le16_to_cpu(ch->Hlength);
if (prev2type == VerifyNext2) {
- start = prev; last = prev2;
+ next = prev;
+ last = prev2;
}
if (prevtype == VerifyNext) {
- start = this;
+ next = this;
last = prev;
}
/* shift prev info back */
- prev2 = prev; prev2type = prevtype;
- prev = this ; prevtype = le16_to_cpu(ch->verify_type);
+ prev2 = prev;
+ prev2type = prevtype;
+ prev = this;
+ prevtype = le16_to_cpu(ch->verify_type);
this = le64_to_cpu(ch->next_addr);
if (prevtype == VerifyNull) {
- start = this;
+ next = this;
last = prev;
}
seq++;
}
- dprintk("LaFS: Next address to write is %llu\n", start);
- *next = start;
+ dprintk("LaFS: Next address to write is %llu\n", next);
+ *nextp = next;
*lastp = last;
- if (start == this)
+ if (next == this)
*seqp = seq;
- else if (start == prev)
+ else if (next == prev)
*seqp = seq-1;
- else if (start == prev2)
+ else if (next == prev2)
*seqp = seq-2;
else
BUG();
li = LAFSI(inode);
switch (li->type) {
+ default: /* Any unknown type is an error */
+ printk(KERN_WARNING "LAFS impossibly file type for roll-forward: %d\n",
+ li->type);
+ err = -EIO;
+ break;
+
case TypeInodeFile:
- BUG_ON(fsnum); /* FIXME should be more careful */
+ if (fsnum) {
+ printk(KERN_WARNING "LAFS: Ignoring impossible sub-subset\n");
+ break;
+ }
+
lafs_iput_fs(inode);
inode = lafs_iget_fs(fs, inum, bnum, SYNC);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
- if (err == -EIO && offset == 0) {
- /* creating new inode */
- }
- return PTR_ERR(inode);
+ if (err != -ENOENT || offset != 0)
+ return err;
+
+ /* FIXME creating new inode */
+ BUG();
}
db = lafs_inode_dblock(inode, SYNC, MKREF(roll));
+ if (IS_ERR(db)) {
+ err = PTR_ERR(db);
+ break;
+ }
+ /* Make sure block is in-sync with inode */
+ lafs_inode_fillblock(inode);
buf = map_dblock(db);
- /* FIXME do I sync the inode back to the datablock first? */
memcpy(buf+offset, data, len);
unmap_dblock(db, buf);
err = lafs_import_inode(inode, db);
}
putdref(db, MKREF(roll));
break;
+
+ case TypeDir:
+ /* Haven't written this yet FIXME */
+ BUG();
+ break;
}
lafs_iput_fs(inode);
return err;
static int __must_check
roll_block(struct fs *fs, int fsnum, int inum, int trunc, int flg,
- u32 bnum, u64 baddr, int type, struct page *p)
+ u32 bnum, u64 baddr, int bytes, struct page *p)
{
struct inode *inode;
struct datablock *blk = NULL;
if (flg)
return 0; /* "old" blocks aren't interesting */
- if (type == DescIndex)
+ if (bytes == DescIndex)
return 0; /* index blocks aren't interesting either */
- if (type == DescHole)
+ if (bytes == DescHole)
return 0; /* FIXME should I punch a hole here? */
dprintk("Roll Block %d/%d/%d/%lu/%llu\n",
if (IS_ERR(inode))
return PTR_ERR(inode);
- /* FIXME do I need to 'lock' the inode in any way? */
-
/* check type */
li = LAFSI(inode);
/* already correctly indexed */
break;
- /* FIXME do I need to dirty the inode to flush
- * this change into the datablock?
- */
if (li->type >= TypeBase &&
- inode->i_size <= ((loff_t)bnum << inode->i_blkbits))
- inode->i_size = ((loff_t)bnum << inode->i_blkbits) + type;
+ inode->i_size <= ((loff_t)bnum << inode->i_blkbits)) {
+ inode->i_size = ((loff_t)bnum << inode->i_blkbits) + bytes;
+ set_bit(I_Dirty, &LAFSI(inode)->iflags);
+ }
/* FIXME: we pretend this is a dirty, pinned block
* so the lower-level code doesn't get confused.