};
#define WC_NUM 3 /* 3 active write-clusters: new, clean, and defrag */
+#define CLEANER_SEGS 4 /* Clean at most 4 segments at a time */
+
+#define ACCOUNT_RESERVED 3 /* Reserve 3 segments of space for accounting and
+ * cleaning
+ */
+#define RELEASE_RESERVED 1 /* Reserve 1 segment of space for overhead required
+ * to release space (e.g. delete file)
+ */
+#define TOTAL_RESERVED (ACCOUNT_RESERVED + RELEASE_RESERVED)
struct fs {
struct lafs_state *state;
u32 devices;
u32 statesize;
+ int blocksize, blocksize_bits;
int devs_loaded;
atomic_t sb_writes_pending;
u32 nonlog_segment;
unsigned short nonlog_dev;
- u16 nonlog_offset;
+ u32 nonlog_offset;
u32 maxsnapshot;
u64 checkpointcluster;
int rolled; /* set when rollforward has completed */
unsigned long fsstate;
#define CheckpointNeeded 0
-#define CleanerRunning 1
-#define CleanerNeeded 2
+#define ThreadRunning 1
+#define ThreadNeeded 2
#define FinalCheckpoint 3
#define CleanerDisabled 4
#define OrphansRunning 5
* be full and new allocation requests get
* -ENOSPC
*/
+#define FlushNeeded 8 /* Need to flush the current cluster because
+ * someone is waiting on writeback
+ */
+#define SecondFlushNeeded 9 /* Need a second cluster to commit the blocks
+ * in the previous one
+ */
+#define EmergencyPending 10 /* Cleaner isn't quite in emergency mode, but
+ * should be after the next checkpoint unless that
+ * releases lots of space
+ */
+#define CheckpointOpen 11 /* Some data has been written since the last checkpoint,
+ * so 'next_checkpoint' is a valid timestamp
+ */
+#define DelayYouth 12 /* While set, don't update any youth blocks. The update will
+ * happen either during seg_apply_all or in roll-forward
+ */
+
+ unsigned long next_checkpoint; /* Valid when CheckpointOpen is set, holds
+ * jiffie time by when we need to do a checkpoint
+ */
struct work_struct done_work; /* used for handling
* refile after write completes */
u16 dev;
u32 seg;
u64 haddr; /* Address of this write-cluster-header */
+ u64 seq; /* seq of most recent header loaded */
struct cluster_head *ch;
struct group_head *gh; /* current group head */
struct descriptor *desc;
3=loaded, 4 = ioerror */
struct fs *fs;
} ac;
- int ss; /* true if dev/seg are value */
+ int have_addr; /* true if dev/seg are valid */
struct list_head cleaning;
struct page *chead;
- } seg[4];
+ } seg[CLEANER_SEGS];
} cleaner;
- struct task_struct *cleaner_thread;
+ struct task_struct *thread;
- unsigned long newblocks; /* number of blocks written since checkpoint */
- unsigned long max_newblocks; /* max blocks in a checkpoint (roughly) */
+ unsigned long newsegments; /* number of segments written since checkpoint */
+ unsigned long max_newsegs; /* max segments in a checkpoint (roughly) */
/* counters for (pre)allocating space. */
spinlock_t alloc_lock;
- u64 free_blocks; /* initialised from free segment info */
+ s64 free_blocks; /* initialised from free segment info */
u64 allocated_blocks; /* Blocks that have been (pre)allocated */
u64 clean_reserved; /* Blocks reserved for cleaner segments */
u64 max_segment; /* largest segment size */
/* Youth management */
int youth_next; /* number to assign to next segment */
- unsigned short youth_dev; /* device number being decayed, or devs_loaded if none */
- int youth_block; /* block number of next block to decay */
- int checkpoint_youth; // FIXME make sure this gets decayed
+ int checkpoint_youth;
/* 10 heights: 0 to 9 */
#define SEG_NUM_HEIGHTS (10)
} unused, free, cleanable, clean;
unsigned short head[SEG_NUM_HEIGHTS]; /* head of skiplist */
int total;
- int max_score;
+ long long max_score;
int sorted_size;
} segtrack[1];
struct {
int free_dev, free_block, free_stage;
int first_free_pass; /* true the first time */
- int done; /* cleared on each checkpoint */
+ int done, do_decay; /* cleared on each checkpoint */
struct datablock *youth_db, *usage0_db;
- u16 *free_usages; /* This is an allocated page */
+ u32 *free_usages; /* This is an allocated page */
int trace;
} scan;
struct list_head flush_list; /* list of qents that need flushing */
struct list_head qhash[QHASHSIZE];
+ struct backing_dev_info bdi;
struct fs_dev {
- struct super_block *sb;
+ struct block_device *bdev;
struct lafs_dev *devblk;
u64 start, size;
u32 segment_stride;
u32 segment_count;
u32 usage_inum;
- u16 level;
- u32 rows_per_table, tables_per_seg;
+ u32 rows_per_table, tables_per_seg;
int recent_dev, recent_state;
- int tablesize; /* const */
+ int tablesize; /* in segusage file, not in segments */
struct inode *segsum;
} *devs;
int pending_next;
wait_queue_head_t pending_wait;
+ struct bio *bio; /* bio we are building */
+ u64 bio_virt; /* next sector we can add to bio */
+ int bio_head; /* True if current bio contains a header block */
+ int bio_which; /* pending[X] entry for this bio */
+ struct request_queue *bio_queue; /* queue to unplug */
+
struct segpos {
int dev;
u32 num;
struct hlist_head stable[SHASHSIZE];
spinlock_t stable_lock;
+ int stable_changed; /* Flag so we can see if the table changed while
+ * we dropped a lock */
+
+ struct rename_roll {
+ struct rename_roll *next;
+ u32 key;
+ struct inode *dir, *inode;
+ int nlen;
+ char name[1];
+ } *pending_renames;
+
};
static inline int test_phase_locked(struct fs *fs)
* in storage (i.e. in another snapshot)
*/
- struct block *chain; /* on list of unincorporated changes */
+ struct block *chain; /* on list of unincorporated changes, or list of blocks
+ * being read in */
#if DEBUG_REF
struct ref {int cnt; char *name; } holders[16];
u32 orphan_slot; /* slot in orphan file to record that this
* block is an orphan
*/
- struct list_head orphans; /* linked list of blocks needing orphan
- * processing.
- */
- struct list_head cleaning; /* list of blocks being cleaned.
- * Could share with orphans FIXME
+ union {
+ /* If a block is both an orphan and undergoing
+ * cleaning, it lives on the cleaning list until
+ * the cleaner has checked it. It is then moved
+ * to the pending_orphans list.
+ */
+ struct list_head orphans; /* linked list of blocks needing orphan
+ * processing.
*/
+ struct list_head cleaning; /* list of blocks being cleaned.
+ */
+ };
union {
struct inode *my_inode; /* only valid for block holding an inode */
};
enum {
/* NOTE: 32 flags in used. Need to overlap 'data' with 'index' if
- * we need more
+ * we need much more
*/
/* First flags that are meaningful for both Data and Index blocks
- * Currently 23 */
+ * Currently 22 */
B_Phase1 = 0, /* phase when pinned - must be '1' - used for indexing */
B_Dirty, /* block has changes which haven't been committed */
B_Index, /* This is an index block, not a data block */
B_Linked, /* block is known to be linked with all peers */
- B_WritePhase1, /* Block is pinned and has been written to this phase. */
B_Realloc, /* If set on a B_Dirty block, it was only dirtied for
* cleaning purposes and so should be written to the
* cleaner segment.
B_PhysValid, /* ->physaddr is correct */
/* Flags that are only relevant for Data Block
- * currently 6 */
+ * currently 7 */
B_PinPending, /* set on data blocks while checkpoint_locked if we might
* want to mark them dirty
B_HaveWriteback,/* We own the page writeback flag and when all blocks
* are unlocked we should clear it. */
B_Claimed, /* Used for exclusive allocation of inodes */
+ B_Cleaning, /* Used by cleaner to track which blocks it has a
+ * reference on already. */
+
/* Flags that are only relevant for Index Blocks
- * currently 2 */
+ * currently 3 */
B_OnFree, /* index block on the free list */
B_PrimaryRef, /* This indexblock has not been incorporated into the
* parent, and so can only be found by being a sibling
/* indexing info stays in the block, not in the inode */
struct lafs_inode {
struct inode vfs_inode;
+ struct inode *filesys; /* Inode of containing TypeInodeFile */
struct indexblock *iblock;
struct datablock *dblock;
- struct inode *filesys;
long cblocks, /* data blocks which are commited to this file */
pblocks, /* extra data blocks that are commited in next phase */
ablocks, /* data blocks that are allocated */
#define I_Destroyed 4 /* inode destroy has been delayed */
#define I_Trunc 5 /* a truncation is in process */
#define I_Pinned 6 /* InoIdx is pinned, i_nlink is non-zero, and consequently
- * we own an extra ref to the inode.
+ * we own an extra ref to the inode and superblock.
+ */
+#define I_AccessTime 7 /* Set if this inode holds a reference to the block
+ * in the accesstime file that holds our atime
*/
/* next three indicate if we hold a reference on the relevant qent */
#define I_QUid 8
loff_t trunc_next; /* next block to truncate from */
union {
struct fs_md {
- int usagetable;
- u64 update_time;
+ int usagetable; /* 0 and unused for subsets,
+ * 1 for main fs, >1 for snapshots
+ */
u64 cblocks_used; /* blocks commited */
u64 pblocks_used; /* extra blocks in next phase */
u64 ablocks_used; /* extra blocks allocated */
u64 blocks_allowed;
- u64 blocks_unalloc;
+ u64 blocks_unalloc; /* FIXME what is this for ?? */
+
u64 creation_age;
+ u32 parent; /* like 'parent' in directory */
u32 inodes_used;
u32 quota_inums[3];
struct inode *quota_inodes[3];
- char name[65];
+ struct inode *accesstime;
+ char *name;
} fs;
struct inodemap_md {
u32 size;
u32 table_size; /* (blocks) */
} segmentusage;
struct file_md {
+ u16 atime_offset; /* value stored in atime file */
u16 flags;
/* u16 mode; */
/* u32 uid; */
u32 gracetime; /* multiplier for graceunits */
u32 graceunits; /* seconds per unit */
} quota;
+
+ /* We free inodes using rcu, by which time any
+ * metadata is irrelevant and the type is zero
+ */
+ struct rcu_head rcu;
} md;
};