]> git.neil.brown.name Git - LaFS.git/blob - io.c
260e92638b8a0e6059be729a99bfc15bd99900fd
[LaFS.git] / io.c
1
2 /*
3  * IO routines for LaFS
4  * fs/lafs/io.c
5  * Copyright (C) 2006-2009
6  * NeilBrown <neilb@suse.de>
7  * Released under the GPL, version 2
8  */
9
10 /*
11  * There are quite separate sets of routines here.
12  * One set is used for reading and writing filesystem blocks.
13  * Reading is generally asynchronous, but can be waited for.
14  * Writing is sequential into write-clusters.  It is not possible
15  * to wait for a particular write, but only to wait for a write-cluster
16  * to by safe.
17  * The other set is for all other IO such as reading/writing superblocks
18  * and stateblocks, and for reading cluster-heads during roll-forward.
19  * These reads are always synchronous while write allow all devices
20  * to be written in parallel.
21  */
22
23 #include        "lafs.h"
24 #include        <linux/blkdev.h>
25 #include        <linux/bit_spinlock.h>
26
27 int
28 lafs_dev_find(struct fs *fs, u64 virt)
29 {
30         int i;
31         for (i = 0; i < fs->devices; i++)
32                 if (virt >= fs->devs[i].start &&
33                     virt < fs->devs[i].start + fs->devs[i].size)
34                         return i;
35         printk("%llu not found:\n", (unsigned long long) virt);
36         for (i = 0; i < fs->devices; i++)
37                 printk(" %d: %llu+%llu\n", i,
38                        (unsigned long long)fs->devs[i].start,
39                        (unsigned long long)fs->devs[i].size);
40         BUG();
41         return -1;
42 }
43
44 static void bi_complete(struct bio *bio, int error)
45 {
46         complete((struct completion *)bio->bi_private);
47 }
48
49 int
50 lafs_sync_page_io(struct block_device *bdev, sector_t sector,
51                   int offset, int size,
52                   struct page *page, int rw)
53 {
54         struct bio *bio = bio_alloc(GFP_NOIO, 1);
55         struct completion event;
56         int ret;
57
58         rw |= (1 << BIO_RW_UNPLUG);
59
60         bio->bi_bdev = bdev;
61         bio->bi_sector = sector;
62         bio_add_page(bio, page, size, offset);
63         init_completion(&event);
64         bio->bi_private = &event;
65         bio->bi_end_io = bi_complete;
66         submit_bio(rw, bio);
67         wait_for_completion(&event);
68
69         ret = !!test_bit(BIO_UPTODATE, &bio->bi_flags);
70         bio_put(bio);
71         return ret;
72 }
73
74 int
75 lafs_load_page(struct fs *fs, struct page *p, u64 vaddr, int blocks)
76 {
77         int dev;
78         sector_t sect;
79         struct block_device *bdev;
80
81         virttophys(fs, vaddr, &dev, &sect);
82
83         if (dev < 0 || dev >= fs->devs_loaded) {
84                 dprintk("dev %d not in [0..%d)\n", dev, fs->devs_loaded);
85                 return -EIO;
86         }
87
88         bdev = fs->devs[dev].bdev;
89         return lafs_sync_page_io(bdev, sect, 0,
90                                  blocks << fs->blocksize_bits,
91                                  p, 0) ? 0 : -EIO;
92 }
93
94 static void
95 bi_async_complete(struct bio *bio, int error)
96 {
97         struct async_complete *ac = bio->bi_private;
98
99         if (test_bit(BIO_UPTODATE, &bio->bi_flags))
100                 ac->state = 3;
101         else
102                 ac->state = 4;
103         bio_put(bio);
104         lafs_wake_thread(ac->fs);
105 }
106
107 static void
108 async_page_io(struct block_device *bdev, sector_t sector, int offset, int size,
109               struct page *page, int rw, struct async_complete *ac)
110 {
111         struct bio *bio = bio_alloc(GFP_NOIO, 1);
112
113         rw |= (1 << BIO_RW_UNPLUG);
114
115         bio->bi_bdev = bdev;
116         bio->bi_sector = sector;
117         bio_add_page(bio, page, size, offset);
118         bio->bi_private = ac;
119         bio->bi_end_io = bi_async_complete;
120         submit_bio(rw, bio);
121 }
122
123 int
124 lafs_load_page_async(struct fs *fs, struct page *p, u64 vaddr,
125                      int blocks, struct async_complete *ac)
126 {
127         int dev;
128         sector_t sect;
129         struct block_device *bdev;
130
131         virttophys(fs, vaddr, &dev, &sect);
132
133         if (dev < 0 || dev >= fs->devs_loaded) {
134                 dprintk("dev %d not in [0..%d)\n", dev, fs->devs_loaded);
135                 return -EIO;
136         }
137         if (ac->state == 2)
138                 return -EAGAIN;
139         if (ac->state == 3)
140                 return 0;
141         if (ac->state == 4)
142                 return -EIO;
143
144         bdev = fs->devs[dev].bdev;
145         ac->state = 2; /* loading */
146         ac->fs = fs;
147         async_page_io(bdev, sect, 0,
148                       blocks << fs->blocksize_bits,
149                       p, 0, ac);
150         return -EAGAIN;
151 }
152
153 static void
154 bi_write_done(struct bio *bio, int error)
155 {
156         struct fs *fs = bio->bi_private;
157
158         if (atomic_dec_and_test(&fs->sb_writes_pending))
159                 wake_up(&fs->sb_writes_wait);
160         bio_put(bio);
161         /* FIXME didn't do anything with error */
162 }
163
164 void
165 lafs_super_write(struct fs *fs, int dev, u64 addr, char *buf, int size)
166 {
167         struct bio *bio = bio_alloc(GFP_NOIO, 1);
168         int rw = WRITE | (1 << BIO_RW_UNPLUG);
169
170         bio->bi_bdev = fs->devs[dev].bdev;
171         bio->bi_sector = addr;
172         bio_add_page(bio, virt_to_page(buf), size, offset_in_page(buf));
173         bio->bi_private = fs;
174         bio->bi_end_io = bi_write_done;
175         atomic_inc(&fs->sb_writes_pending);
176         submit_bio(rw, bio);
177 }
178
179 int
180 lafs_super_wait(struct fs *fs)
181 {
182         wait_event(fs->sb_writes_wait,
183                    atomic_read(&fs->sb_writes_pending) == 0
184                 );
185         return 0; /* FIXME should be an error flag */
186 }
187
188 static int sched(void *flags)
189 {
190         io_schedule();
191         return 0;
192 }
193
194 void _lafs_iolock_block(struct block *b)
195 {
196         while (test_and_set_bit(B_IOLock, &b->flags)) {
197 #ifdef DEBUG_IOLOCK
198                 printk("iolock wait for %s:%d: %s\n",
199                        b->iolock_file, b->iolock_line,
200                        strblk(b));
201 #endif
202                 wait_on_bit(&b->flags, B_IOLock,
203                             sched, TASK_UNINTERRUPTIBLE);
204         }
205 }
206
207 int _lafs_iolock_block_async(struct block *b)
208 {
209         for(;;) {
210                 if (!test_and_set_bit(B_IOLock, &b->flags)) {
211                         /* just got the lock! */
212                         if (test_and_clear_bit(B_Async, &b->flags))
213                                 putref(b, MKREF(async));
214                         return 1;
215                 }
216                 if (test_and_set_bit(B_Async, &b->flags))
217                         /* already have async set */
218                         return 0;
219                 getref(b, MKREF(async));
220         }
221 }
222
223 void
224 lafs_iounlock_block(struct block *b)
225 {
226         /* Unlock this block, and if it is the last locked block
227          * for the page, unlock the page too.
228          * This only applied to data blocks.
229          */
230
231         if (test_bit(B_Index, &b->flags))
232                 clear_bit(B_IOLock, &b->flags);
233         else
234                 lafs_iocheck_block(dblk(b), 1);
235
236         wake_up_bit(&b->flags, B_IOLock);
237         if (test_bit(B_Async, &b->flags))
238                 lafs_wake_thread(fs_from_inode(b->inode));
239 }
240
241 void lafs_writeback_done(struct block *b)
242 {
243         /* remove writeback flag on this block.
244          * If it is last on page, release page as well.
245          */
246
247         if (test_bit(B_Index, &b->flags)) {
248                 clear_bit(B_Writeback, &b->flags);
249                 wake_up_bit(&b->flags, B_Writeback);
250                 if (test_bit(B_Async, &b->flags))
251                         lafs_wake_thread(fs_from_inode(b->inode));
252         } else
253                 lafs_iocheck_writeback(dblk(b), 1);
254 }
255
256 void lafs_iocheck_block(struct datablock *db, int unlock)
257 {
258         struct page *page = db->page;
259         struct datablock *blist;
260         int n, i;
261         int locked = 0;
262         int havelock = 0;
263
264         if (!page)
265                 return;
266         blist = (struct datablock *)page->private;
267         if (!blist)
268                 return;
269
270         n = 1<<(PAGE_CACHE_SHIFT - blist->b.inode->i_blkbits);
271         bit_spin_lock(B_IOLockLock, &blist->b.flags);
272         if (unlock)
273                 clear_bit(B_IOLock, &db->b.flags);
274         for (i = 0 ; i < n; i++) {
275                 if (test_bit(B_IOLock, &blist[i].b.flags))
276                         locked++;
277                 /* FIXME what about checking uptodate ?? */
278         }
279         if (!locked && test_and_clear_bit(B_HaveLock, &blist->b.flags))
280                 havelock = 1;
281         bit_spin_unlock(B_IOLockLock, &blist->b.flags);
282
283         if (havelock) {
284                 if (!PageError(page))
285                         SetPageUptodate(page);
286                 unlock_page(page);
287         }
288 }
289
290 void lafs_iocheck_writeback(struct datablock *db, int unlock)
291 {
292         struct page *page = db->page;
293         struct datablock *blist;
294         int n, i;
295         int locked = 0;
296         int havewrite = 0;
297
298         if (!page)
299                 return;
300         blist = (struct datablock *)page->private;
301         if (!blist)
302                 return;
303
304         n = 1<<(PAGE_CACHE_SHIFT - blist->b.inode->i_blkbits);
305         bit_spin_lock(B_IOLockLock, &blist->b.flags);
306         if (unlock)
307                 clear_bit(B_Writeback, &db->b.flags);
308         for (i = 0 ; i < n; i++) {
309                 if (test_bit(B_Writeback, &blist[i].b.flags))
310                         locked++;
311                 /* FIXME what about checking uptodate ?? */
312         }
313         if (!locked && test_and_clear_bit(B_HaveWriteback, &blist->b.flags))
314                 havewrite = 1;
315         bit_spin_unlock(B_IOLockLock, &blist->b.flags);
316
317         if (havewrite)
318                 end_page_writeback(page);
319         if (unlock) {
320                 wake_up_bit(&db->b.flags, B_Writeback);
321                 if (test_bit(B_Async, &db->b.flags))
322                         lafs_wake_thread(fs_from_inode(db->b.inode));
323         }
324 }
325
326 static int sched_valid(void *flags)
327 {
328         if (test_bit(B_Valid, flags))
329                 return -EINTR;
330
331         schedule();
332         return 0;
333 }
334
335 int __must_check
336 lafs_wait_block(struct block *b)
337 {
338         if (test_bit(B_IOLock, &b->flags) &&
339             !test_bit(B_Valid, &b->flags))
340                 wait_on_bit(&b->flags, B_IOLock, 
341                             sched_valid, TASK_UNINTERRUPTIBLE);
342
343         return test_bit(B_Valid, &b->flags) ? 0 : -EIO;
344 }
345
346 int __must_check
347 lafs_wait_block_async(struct block *b)
348 {
349         for (;;) {
350                 if (!test_bit(B_IOLock, &b->flags) ||
351                     test_bit(B_Valid, &b->flags)) {
352                         if (test_and_clear_bit(B_Async, &b->flags))
353                                 putref(b, MKREF(async));
354                         if (test_bit(B_Valid, &b->flags))
355                                 return 0;
356                         else
357                                 return -EIO;
358                 }
359                 if (test_and_set_bit(B_Async, &b->flags))
360                         return -EAGAIN;
361                 getref(b, MKREF(async));
362         }
363 }
364
365 static void wait_writeback(struct block *b)
366 {
367         if (test_bit(B_Writeback, &b->flags)) {
368 #ifdef DEBUG_IOLOCK
369                 printk("writeback wait for %s:%d: %s\n",
370                        b->iolock_file, b->iolock_line,
371                        strblk(b));
372 #endif
373                 lafs_trigger_flush(b);
374                 wait_on_bit(&b->flags, B_Writeback,
375                             sched, TASK_UNINTERRUPTIBLE);
376         }
377 }
378
379 void _lafs_iolock_written(struct block *b)
380 {
381         _lafs_iolock_block(b);
382         wait_writeback(b);
383 }
384
385 int _lafs_iolock_written_async(struct block *b)
386 {
387         for (;;) {
388                 if (!test_bit(B_Writeback, &b->flags) &&
389                     !test_and_set_bit(B_IOLock, &b->flags)) {
390                         if (!test_bit(B_Writeback, &b->flags)) {
391                                 /* Have lock without writeback */
392                                 if (test_and_clear_bit(B_Async, &b->flags))
393                                         putref(b, MKREF(async));
394                                 return 1;
395                         }
396                         /* Writeback was set by a racing thread.. */
397                         lafs_iounlock_block(b);
398                 }
399                 lafs_trigger_flush(b);
400                 if (test_and_set_bit(B_Async, &b->flags))
401                         return 0;
402
403                 getref(b, MKREF(async));
404         }
405 }
406
407 static void
408 block_loaded(struct bio *bio, int error)
409 {
410         struct block *b = bio->bi_private;
411
412         dprintk("loaded %d of %d\n", (int)b->fileaddr, (int)b->inode->i_ino);
413         if (test_bit(BIO_UPTODATE, &bio->bi_flags)) {
414                 set_bit(B_Valid, &b->flags); /* FIXME should I set
415                                                 an error too? */
416         } else if (!test_bit(B_Index, &b->flags) && dblk(b)->page) {
417                 ClearPageUptodate(dblk(b)->page);
418                 SetPageError(dblk(b)->page);
419         } else
420                 dprintk("Block with no page!!\n");
421         lafs_iounlock_block(b);
422 }
423
424 static void
425 blocks_loaded(struct bio *bio, int error)
426 {
427         struct block *bhead = bio->bi_private;
428
429         while (bhead->chain) {
430                 struct block *b = bhead->chain;
431                 bhead->chain = b->chain;
432                 b->chain = NULL;
433                 bio->bi_private = b;
434                 block_loaded(bio, error);
435         }
436         bio->bi_private = bhead;
437         block_loaded(bio, error);
438 }
439
440 int __must_check
441 lafs_load_block(struct block *b, struct bio *bio)
442 {
443         int dev;
444         sector_t sect;
445         struct block_device *bdev;
446         struct fs *fs = fs_from_inode(b->inode);
447         struct page *page;
448         struct block *headb;
449         int offset;
450
451         if (!test_bit(B_PhysValid, &b->flags))
452                 b->physaddr = 0;
453         if (test_bit(B_Valid, &b->flags))
454                 return 0;
455         lafs_iolock_block(b);
456         if (test_bit(B_Valid, &b->flags)) {
457                 lafs_iounlock_block(b);
458                 return 0;
459         }
460         LAFS_BUG(test_bit(B_InoIdx, &b->flags), b);
461         if (test_bit(B_Index, &b->flags)) {
462                 struct indexblock *ib = iblk(b);
463
464                 if (b->physaddr == 0) {
465                         /* An empty index block.  One doesn't
466                          * see many of these as it means we trimmed
467                          * out some blocks, but not all following
468                          * block, and block in the hole is being
469                          * looked for.  Just Create a valid clear
470                          * index block.
471                          */
472                         lafs_clear_index(ib);
473                         lafs_iounlock_block(b);
474                         return 0;
475                 }
476
477                 page = virt_to_page(ib->data);
478                 offset = offset_in_page(ib->data);
479         } else {
480                 struct datablock *db = dblk(b);
481                 if (b->physaddr == 0) {
482                         /* block is either in the inode, or
483                          * non-existent (all 'nul').
484                          */
485                         struct lafs_inode *lai = LAFSI(b->inode);
486                         void *baddr = map_dblock(db);
487
488                         /* This case is handled in find_block */
489                         LAFS_BUG(lai->depth == 0 && b->fileaddr == 0, b);
490
491                         memset(baddr, 0, (1<<b->inode->i_blkbits));
492                         unmap_dblock(db, baddr);
493                         set_bit(B_Valid, &b->flags);
494                         lafs_iounlock_block(b);
495                         return 0;
496                 }
497                 page = db->page;
498                 offset = dblock_offset(db);
499         }
500
501         virttophys(fs, b->physaddr, &dev, &sect);
502
503         if (dev < 0) {
504                 lafs_iounlock_block(b);
505                 return -EIO;
506         }
507
508         bdev = fs->devs[dev].bdev;
509
510         if (!bio) {
511                 bio = bio_alloc(GFP_NOIO, 1);
512
513                 bio->bi_bdev = bdev;
514                 bio->bi_sector = sect;
515                 bio_add_page(bio, page, fs->blocksize, offset);
516
517                 bio->bi_private = b;
518                 bio->bi_end_io = block_loaded;
519                 submit_bio(READ, bio);
520
521                 return 0;
522         }
523         LAFS_BUG(b->chain != NULL, b);
524         if (bio->bi_size == 0) {
525                 bio->bi_sector = sect;
526                 bio->bi_bdev = bdev;
527                 bio_add_page(bio, page, fs->blocksize, offset);
528                 bio->bi_private = b;
529                 bio->bi_end_io = blocks_loaded;
530                 return 0;
531         }
532         if (bio->bi_sector + (bio->bi_size / 512) != sect
533             || bio->bi_bdev != bdev
534             || bio_add_page(bio, page, fs->blocksize, offset) == 0)
535                 return -EINVAL;
536         /* added the block successfully */
537         headb = bio->bi_private;
538         b->chain = headb->chain;
539         headb->chain = b;
540         return 0;
541 }
542
543 int __must_check
544 lafs_read_block(struct datablock *b)
545 {
546         int rv;
547
548         if (test_bit(B_Valid, &b->b.flags))
549                 return 0;
550
551         rv = lafs_find_block(b, NOADOPT);
552         if (rv)
553                 return rv;
554         rv = lafs_load_block(&b->b, NULL);
555         if (rv)
556                 return rv;
557         return lafs_wait_block(&b->b);
558 }
559
560 int __must_check
561 lafs_read_block_async(struct datablock *b)
562 {
563         int rv;
564
565         if (test_bit(B_Valid, &b->b.flags))
566                 return 0;
567
568         rv = lafs_find_block_async(b);
569         if (rv)
570                 return rv;
571         rv = lafs_load_block(&b->b, NULL);
572         if (rv)
573                 return rv;
574         return lafs_wait_block_async(&b->b);
575 }
576
577 /*------------------------------------------------------------------
578  * Writing filesystem blocks and cluster headers.
579  * The endio function is found from lafs_cluster_endio_choose.
580  * We need to increment the pending_cnt for this cluster and,
581  * if this is a header block, possibly for earlier clusters.
582  *
583  * Later should attempt to combine multiple blocks into the
584  * one bio ... if we can manage the b_endio function properly
585  */
586
587 static void write_block(struct fs *fs, struct page *p, int offset,
588                         u64 virt, struct wc *wc, int head)
589 {
590         struct bio *bio;
591         sector_t uninitialized_var(sect);
592         int which = wc->pending_next;
593         int dev;
594         int nr_vecs;
595
596         virttophys(fs, virt, &dev, &sect);
597
598         bio = wc->bio;
599         if (bio && virt == wc->bio_virt &&
600             bio->bi_bdev == fs->devs[dev].bdev &&
601             which == wc->bio_which &&
602             bio_add_page(bio, p, fs->blocksize, offset) > 0) {
603                 /* Added the current bio - too easy */
604                 wc->bio_virt++;
605                 return;
606         }
607
608         if (bio) {
609                 int w = wc->bio_which;
610                 /* need to submit the pending bio and add to pending counts */
611                 atomic_inc(&wc->pending_cnt[w]);
612                 if (wc->bio_head) {
613                         w = (w+3) % 4;
614                         if (wc->pending_vfy_type[w] == VerifyNext ||
615                             wc->pending_vfy_type[w] == VerifyNext2)
616                                 atomic_inc(&wc->pending_cnt[w]);
617                         w = (w+3) % 4;
618                         if (wc->pending_vfy_type[w] == VerifyNext2)
619                                 atomic_inc(&wc->pending_cnt[w]);
620                 }
621                 wc->bio = NULL;
622                 if (wc->bio_queue && wc->bio_queue != bdev_get_queue(bio->bi_bdev))
623                         blk_unplug(wc->bio_queue);
624                 wc->bio_queue = bdev_get_queue(bio->bi_bdev);
625                 submit_bio(WRITE, bio);
626                 bio = NULL;
627         }
628         if (!virt && !head) {
629                 /* end of cluster */
630                 if (wc->bio_queue)
631                         blk_unplug(wc->bio_queue);
632                 wc->bio_queue = NULL;
633                 return;
634         }
635         nr_vecs = 128; /* FIXME */
636         while (!bio && nr_vecs) {
637                 bio = bio_alloc(GFP_NOIO, nr_vecs);
638                 nr_vecs /= 2;
639         }
640         wc->bio = bio;
641         wc->bio_virt = virt + 1;
642         wc->bio_head = head;
643         wc->bio_which = which;
644         bio->bi_bdev = fs->devs[dev].bdev;
645         bio->bi_sector = sect;
646         bio_add_page(bio, p, fs->blocksize, offset);
647
648         bio->bi_private = wc;
649         bio->bi_end_io = lafs_cluster_endio_choose(which, head);
650 }
651
652 void lafs_write_head(struct fs *fs, struct cluster_head *head, u64 virt,
653                      struct wc *wc)
654 {
655         write_block(fs, virt_to_page(head), offset_in_page(head),
656                     virt, wc, 1);
657 }
658
659 void lafs_write_block(struct fs *fs, struct block *b, struct wc *wc)
660 {
661         if (test_bit(B_Index, &b->flags))
662                 write_block(fs, virt_to_page(iblk(b)->data),
663                             offset_in_page(iblk(b)->data),
664                             b->physaddr, wc, 0);
665         else
666                 write_block(fs, dblk(b)->page, dblock_offset(dblk(b)),
667                             b->physaddr, wc, 0);
668 }
669
670 void lafs_write_flush(struct fs *fs, struct wc *wc)
671 {
672         write_block(fs, NULL, 0, 0, wc, 0);
673 }