5 * Copyright (C) 2006-2009
6 * NeilBrown <neilb@suse.de>
7 * Released under the GPL, version 2
11 * There are quite separate sets of routines here.
12 * One set is used for reading and writing filesystem blocks.
13 * Reading is generally asynchronous, but can be waited for.
14 * Writing is sequential into write-clusters. It is not possible
15 * to wait for a particular write, but only to wait for a write-cluster
17 * The other set is for all other IO such as reading/writing superblocks
18 * and stateblocks, and for reading cluster-heads during roll-forward.
19 * These reads are always synchronous while write allow all devices
20 * to be written in parallel.
24 #include <linux/blkdev.h>
25 #include <linux/bit_spinlock.h>
28 lafs_dev_find(struct fs *fs, u64 virt)
31 for (i = 0; i < fs->devices; i++)
32 if (virt >= fs->devs[i].start &&
33 virt < fs->devs[i].start + fs->devs[i].size)
35 printk("%llu not found:\n", (unsigned long long) virt);
36 for (i = 0; i < fs->devices; i++)
37 printk(" %d: %llu+%llu\n", i,
38 (unsigned long long)fs->devs[i].start,
39 (unsigned long long)fs->devs[i].size);
44 static void bi_complete(struct bio *bio, int error)
46 complete((struct completion *)bio->bi_private);
50 lafs_sync_page_io(struct block_device *bdev, sector_t sector,
52 struct page *page, int rw)
54 struct bio *bio = bio_alloc(GFP_NOIO, 1);
55 struct completion event;
58 rw |= (1 << BIO_RW_UNPLUG);
61 bio->bi_sector = sector;
62 bio_add_page(bio, page, size, offset);
63 init_completion(&event);
64 bio->bi_private = &event;
65 bio->bi_end_io = bi_complete;
67 wait_for_completion(&event);
69 ret = !!test_bit(BIO_UPTODATE, &bio->bi_flags);
75 lafs_load_page(struct fs *fs, struct page *p, u64 vaddr, int blocks)
79 struct block_device *bdev;
81 virttophys(fs, vaddr, &dev, §);
83 if (dev < 0 || dev >= fs->devs_loaded) {
84 dprintk("dev %d not in [0..%d)\n", dev, fs->devs_loaded);
88 bdev = fs->devs[dev].bdev;
89 return lafs_sync_page_io(bdev, sect, 0,
90 blocks << fs->blocksize_bits,
95 bi_async_complete(struct bio *bio, int error)
97 struct async_complete *ac = bio->bi_private;
99 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
104 lafs_wake_thread(ac->fs);
108 async_page_io(struct block_device *bdev, sector_t sector, int offset, int size,
109 struct page *page, int rw, struct async_complete *ac)
111 struct bio *bio = bio_alloc(GFP_NOIO, 1);
113 rw |= (1 << BIO_RW_UNPLUG);
116 bio->bi_sector = sector;
117 bio_add_page(bio, page, size, offset);
118 bio->bi_private = ac;
119 bio->bi_end_io = bi_async_complete;
124 lafs_load_page_async(struct fs *fs, struct page *p, u64 vaddr,
125 int blocks, struct async_complete *ac)
129 struct block_device *bdev;
131 virttophys(fs, vaddr, &dev, §);
133 if (dev < 0 || dev >= fs->devs_loaded) {
134 dprintk("dev %d not in [0..%d)\n", dev, fs->devs_loaded);
144 bdev = fs->devs[dev].bdev;
145 ac->state = 2; /* loading */
147 async_page_io(bdev, sect, 0,
148 blocks << fs->blocksize_bits,
154 bi_write_done(struct bio *bio, int error)
156 struct fs *fs = bio->bi_private;
158 if (atomic_dec_and_test(&fs->sb_writes_pending))
159 wake_up(&fs->sb_writes_wait);
161 /* FIXME didn't do anything with error */
165 lafs_super_write(struct fs *fs, int dev, u64 addr, char *buf, int size)
167 struct bio *bio = bio_alloc(GFP_NOIO, 1);
168 int rw = WRITE | (1 << BIO_RW_UNPLUG);
170 bio->bi_bdev = fs->devs[dev].bdev;
171 bio->bi_sector = addr;
172 bio_add_page(bio, virt_to_page(buf), size, offset_in_page(buf));
173 bio->bi_private = fs;
174 bio->bi_end_io = bi_write_done;
175 atomic_inc(&fs->sb_writes_pending);
180 lafs_super_wait(struct fs *fs)
182 wait_event(fs->sb_writes_wait,
183 atomic_read(&fs->sb_writes_pending) == 0
185 return 0; /* FIXME should be an error flag */
188 static DECLARE_WAIT_QUEUE_HEAD(block_wait); /* need more of these later FIXME */
190 void lafs_io_wake(struct block *b)
192 wake_up(&block_wait);
195 void _lafs_iolock_block(struct block *b)
197 if (test_and_set_bit(B_IOLock, &b->flags)) {
200 printk("iolock wait for %s:%d: %s\n",
201 b->iolock_file, b->iolock_line,
205 prepare_to_wait(&block_wait, &wq, TASK_UNINTERRUPTIBLE);
207 if (!test_and_set_bit(B_IOLock, &b->flags))
211 finish_wait(&block_wait, &wq);
215 int _lafs_iolock_block_async(struct block *b)
218 if (!test_and_set_bit(B_IOLock, &b->flags)) {
219 /* just got the lock! */
220 if (test_and_clear_bit(B_Async, &b->flags))
221 putref(b, MKREF(async));
224 if (test_and_set_bit(B_Async, &b->flags))
225 /* already have async set */
227 getref(b, MKREF(async));
232 lafs_iounlock_block(struct block *b)
234 /* Unlock this block, and if it is the last locked block
235 * for the page, unlock the page too.
236 * This only applied to data blocks.
239 if (test_bit(B_Index, &b->flags))
240 clear_bit(B_IOLock, &b->flags);
242 lafs_iocheck_block(dblk(b), 1);
245 if (test_bit(B_Async, &b->flags))
246 lafs_wake_thread(fs_from_inode(b->inode));
249 void lafs_writeback_done(struct block *b)
251 /* remove writeback flag on this block.
252 * If it is last on page, release page as well.
255 if (test_bit(B_Index, &b->flags)) {
256 clear_bit(B_Writeback, &b->flags);
258 if (test_bit(B_Async, &b->flags))
259 lafs_wake_thread(fs_from_inode(b->inode));
261 lafs_iocheck_writeback(dblk(b), 1);
264 void lafs_iocheck_block(struct datablock *db, int unlock)
266 struct page *page = db->page;
267 struct datablock *blist;
274 blist = (struct datablock *)page->private;
278 n = 1<<(PAGE_CACHE_SHIFT - blist->b.inode->i_blkbits);
279 bit_spin_lock(B_IOLockLock, &blist->b.flags);
281 clear_bit(B_IOLock, &db->b.flags);
282 for (i = 0 ; i < n; i++) {
283 if (test_bit(B_IOLock, &blist[i].b.flags))
285 /* FIXME what about checking uptodate ?? */
287 if (!locked && test_and_clear_bit(B_HaveLock, &blist->b.flags))
289 bit_spin_unlock(B_IOLockLock, &blist->b.flags);
292 if (!PageError(page))
293 SetPageUptodate(page);
298 void lafs_iocheck_writeback(struct datablock *db, int unlock)
300 struct page *page = db->page;
301 struct datablock *blist;
308 blist = (struct datablock *)page->private;
312 n = 1<<(PAGE_CACHE_SHIFT - blist->b.inode->i_blkbits);
313 bit_spin_lock(B_IOLockLock, &blist->b.flags);
315 clear_bit(B_Writeback, &db->b.flags);
316 for (i = 0 ; i < n; i++) {
317 if (test_bit(B_Writeback, &blist[i].b.flags))
319 /* FIXME what about checking uptodate ?? */
321 if (!locked && test_and_clear_bit(B_HaveWriteback, &blist->b.flags))
323 bit_spin_unlock(B_IOLockLock, &blist->b.flags);
326 end_page_writeback(page);
328 lafs_io_wake(&db->b);
329 if (test_bit(B_Async, &db->b.flags))
330 lafs_wake_thread(fs_from_inode(db->b.inode));
335 lafs_wait_block(struct block *b)
337 if (test_bit(B_IOLock, &b->flags) &&
338 !test_bit(B_Valid, &b->flags)) {
341 prepare_to_wait(&block_wait, &wq, TASK_UNINTERRUPTIBLE);
342 if (test_bit(B_IOLock, &b->flags) &&
343 !test_bit(B_Valid, &b->flags))
348 finish_wait(&block_wait, &wq);
350 return test_bit(B_Valid, &b->flags) ? 0 : -EIO;
354 lafs_wait_block_async(struct block *b)
357 if (!test_bit(B_IOLock, &b->flags) ||
358 test_bit(B_Valid, &b->flags)) {
359 if (test_and_clear_bit(B_Async, &b->flags))
360 putref(b, MKREF(async));
361 if (test_bit(B_Valid, &b->flags))
366 if (test_and_set_bit(B_Async, &b->flags))
368 getref(b, MKREF(async));
372 static void wait_writeback(struct block *b)
374 if (test_bit(B_Writeback, &b->flags)) {
377 printk("writeback wait for %s:%d: %s\n",
378 b->iolock_file, b->iolock_line,
381 lafs_trigger_flush(b);
383 prepare_to_wait(&block_wait, &wq, TASK_UNINTERRUPTIBLE);
384 if (test_bit(B_Writeback, &b->flags))
389 finish_wait(&block_wait, &wq);
393 void _lafs_iolock_written(struct block *b)
395 _lafs_iolock_block(b);
399 int _lafs_iolock_written_async(struct block *b)
402 if (!test_bit(B_Writeback, &b->flags) &&
403 !test_and_set_bit(B_IOLock, &b->flags)) {
404 if (!test_bit(B_Writeback, &b->flags)) {
405 /* Have lock without writeback */
406 if (test_and_clear_bit(B_Async, &b->flags))
407 putref(b, MKREF(async));
410 /* Writeback was set by a racing thread.. */
411 lafs_iounlock_block(b);
413 lafs_trigger_flush(b);
414 if (test_and_set_bit(B_Async, &b->flags))
417 getref(b, MKREF(async));
422 block_loaded(struct bio *bio, int error)
424 struct block *b = bio->bi_private;
426 dprintk("loaded %d of %d\n", (int)b->fileaddr, (int)b->inode->i_ino);
427 if (test_bit(BIO_UPTODATE, &bio->bi_flags)) {
428 set_bit(B_Valid, &b->flags); /* FIXME should I set
430 } else if (!test_bit(B_Index, &b->flags) && dblk(b)->page) {
431 ClearPageUptodate(dblk(b)->page);
432 SetPageError(dblk(b)->page);
434 dprintk("Block with no page!!\n");
435 lafs_iounlock_block(b);
439 blocks_loaded(struct bio *bio, int error)
441 struct block *bhead = bio->bi_private;
443 while (bhead->chain) {
444 struct block *b = bhead->chain;
445 bhead->chain = b->chain;
448 block_loaded(bio, error);
450 bio->bi_private = bhead;
451 block_loaded(bio, error);
455 lafs_load_block(struct block *b, struct bio *bio)
459 struct block_device *bdev;
460 struct fs *fs = fs_from_inode(b->inode);
465 if (!test_bit(B_PhysValid, &b->flags))
467 if (test_bit(B_Valid, &b->flags))
469 lafs_iolock_block(b);
470 if (test_bit(B_Valid, &b->flags)) {
471 lafs_iounlock_block(b);
474 LAFS_BUG(test_bit(B_InoIdx, &b->flags), b);
475 if (test_bit(B_Index, &b->flags)) {
476 struct indexblock *ib = iblk(b);
478 if (b->physaddr == 0) {
479 /* An empty index block. One doesn't
480 * see many of these as it means we trimmed
481 * out some blocks, but not all following
482 * block, and block in the hole is being
483 * looked for. Just Create a valid clear
486 lafs_clear_index(ib);
487 lafs_iounlock_block(b);
491 page = virt_to_page(ib->data);
492 offset = offset_in_page(ib->data);
494 struct datablock *db = dblk(b);
495 if (b->physaddr == 0) {
496 /* block is either in the inode, or
497 * non-existent (all 'nul').
499 struct lafs_inode *lai = LAFSI(b->inode);
500 void *baddr = map_dblock(db);
502 /* This case is handled in find_block */
503 LAFS_BUG(lai->depth == 0 && b->fileaddr == 0, b);
505 memset(baddr, 0, (1<<b->inode->i_blkbits));
506 unmap_dblock(db, baddr);
507 set_bit(B_Valid, &b->flags);
508 lafs_iounlock_block(b);
512 offset = dblock_offset(db);
515 virttophys(fs, b->physaddr, &dev, §);
518 lafs_iounlock_block(b);
522 bdev = fs->devs[dev].bdev;
525 bio = bio_alloc(GFP_NOIO, 1);
528 bio->bi_sector = sect;
529 bio_add_page(bio, page, fs->blocksize, offset);
532 bio->bi_end_io = block_loaded;
533 submit_bio(READ, bio);
537 LAFS_BUG(b->chain != NULL, b);
538 if (bio->bi_size == 0) {
539 bio->bi_sector = sect;
541 bio_add_page(bio, page, fs->blocksize, offset);
543 bio->bi_end_io = blocks_loaded;
546 if (bio->bi_sector + (bio->bi_size / 512) != sect
547 || bio->bi_bdev != bdev
548 || bio_add_page(bio, page, fs->blocksize, offset) == 0)
550 /* added the block successfully */
551 headb = bio->bi_private;
552 b->chain = headb->chain;
558 lafs_read_block(struct datablock *b)
562 if (test_bit(B_Valid, &b->b.flags))
565 rv = lafs_find_block(b, NOADOPT);
568 rv = lafs_load_block(&b->b, NULL);
571 return lafs_wait_block(&b->b);
575 lafs_read_block_async(struct datablock *b)
579 if (test_bit(B_Valid, &b->b.flags))
582 rv = lafs_find_block_async(b);
585 rv = lafs_load_block(&b->b, NULL);
588 return lafs_wait_block_async(&b->b);
591 /*------------------------------------------------------------------
592 * Writing filesystem blocks and cluster headers.
593 * The endio function is found from lafs_cluster_endio_choose.
594 * We need to increment the pending_cnt for this cluster and,
595 * if this is a header block, possibly for earlier clusters.
597 * Later should attempt to combine multiple blocks into the
598 * one bio ... if we can manage the b_endio function properly
601 static void write_block(struct fs *fs, struct page *p, int offset,
602 u64 virt, struct wc *wc, int head)
605 sector_t uninitialized_var(sect);
606 int which = wc->pending_next;
610 virttophys(fs, virt, &dev, §);
613 if (bio && virt == wc->bio_virt &&
614 bio->bi_bdev == fs->devs[dev].bdev &&
615 which == wc->bio_which &&
616 bio_add_page(bio, p, fs->blocksize, offset) > 0) {
617 /* Added the current bio - too easy */
623 int w = wc->bio_which;
624 /* need to submit the pending bio and add to pending counts */
625 atomic_inc(&wc->pending_cnt[w]);
628 if (wc->pending_vfy_type[w] == VerifyNext ||
629 wc->pending_vfy_type[w] == VerifyNext2)
630 atomic_inc(&wc->pending_cnt[w]);
632 if (wc->pending_vfy_type[w] == VerifyNext2)
633 atomic_inc(&wc->pending_cnt[w]);
636 if (wc->bio_queue && wc->bio_queue != bdev_get_queue(bio->bi_bdev))
637 blk_unplug(wc->bio_queue);
638 wc->bio_queue = bdev_get_queue(bio->bi_bdev);
639 submit_bio(WRITE, bio);
642 if (!virt && !head) {
645 blk_unplug(wc->bio_queue);
646 wc->bio_queue = NULL;
649 nr_vecs = 128; /* FIXME */
650 while (!bio && nr_vecs) {
651 bio = bio_alloc(GFP_NOIO, nr_vecs);
655 wc->bio_virt = virt + 1;
657 wc->bio_which = which;
658 bio->bi_bdev = fs->devs[dev].bdev;
659 bio->bi_sector = sect;
660 bio_add_page(bio, p, fs->blocksize, offset);
662 bio->bi_private = wc;
663 bio->bi_end_io = lafs_cluster_endio_choose(which, head);
666 void lafs_write_head(struct fs *fs, struct cluster_head *head, u64 virt,
669 write_block(fs, virt_to_page(head), offset_in_page(head),
673 void lafs_write_block(struct fs *fs, struct block *b, struct wc *wc)
675 if (test_bit(B_Index, &b->flags))
676 write_block(fs, virt_to_page(iblk(b)->data),
677 offset_in_page(iblk(b)->data),
680 write_block(fs, dblk(b)->page, dblock_offset(dblk(b)),
684 void lafs_write_flush(struct fs *fs, struct wc *wc)
686 write_block(fs, NULL, 0, 0, wc, 0);