]> git.neil.brown.name Git - LaFS.git/blob - io.c
use little-endian bit operations for inode usage map.
[LaFS.git] / io.c
1
2 /*
3  * IO routines for LaFS
4  * fs/lafs/io.c
5  * Copyright (C) 2006-2009
6  * NeilBrown <neilb@suse.de>
7  * Released under the GPL, version 2
8  */
9
10 /*
11  * There are quite separate sets of routines here.
12  * One set is used for reading and writing filesystem blocks.
13  * Reading is generally asynchronous, but can be waited for.
14  * Writing is sequential into write-clusters.  It is not possible
15  * to wait for a particular write, but only to wait for a write-cluster
16  * to by safe.
17  * The other set is for all other IO such as reading/writing superblocks
18  * and stateblocks, and for reading cluster-heads during roll-forward.
19  * These reads are always synchronous while write allow all devices
20  * to be written in parallel.
21  */
22
23 #include        "lafs.h"
24 #include        <linux/blkdev.h>
25 #include        <linux/bit_spinlock.h>
26
27 int
28 lafs_dev_find(struct fs *fs, u64 virt)
29 {
30         int i;
31         for (i = 0; i < fs->devices; i++)
32                 if (virt >= fs->devs[i].start &&
33                     virt < fs->devs[i].start + fs->devs[i].size)
34                         return i;
35         printk("%llu not found:\n", (unsigned long long) virt);
36         for (i = 0; i < fs->devices; i++)
37                 printk(" %d: %llu+%llu\n", i,
38                        (unsigned long long)fs->devs[i].start,
39                        (unsigned long long)fs->devs[i].size);
40         BUG();
41         return -1;
42 }
43
44 static void bi_complete(struct bio *bio, int error)
45 {
46         complete((struct completion *)bio->bi_private);
47 }
48
49 int
50 lafs_sync_page_io(struct block_device *bdev, sector_t sector,
51                   int offset, int size,
52                   struct page *page, int rw)
53 {
54         struct bio *bio = bio_alloc(GFP_NOIO, 1);
55         struct completion event;
56         int ret;
57
58         rw |= (1 << BIO_RW_UNPLUG);
59
60         bio->bi_bdev = bdev;
61         bio->bi_sector = sector;
62         bio_add_page(bio, page, size, offset);
63         init_completion(&event);
64         bio->bi_private = &event;
65         bio->bi_end_io = bi_complete;
66         submit_bio(rw, bio);
67         wait_for_completion(&event);
68
69         ret = !!test_bit(BIO_UPTODATE, &bio->bi_flags);
70         bio_put(bio);
71         return ret;
72 }
73
74 int
75 lafs_load_page(struct fs *fs, struct page *p, u64 vaddr, int blocks)
76 {
77         int dev;
78         sector_t sect;
79         struct block_device *bdev;
80
81         virttophys(fs, vaddr, &dev, &sect);
82
83         if (dev < 0 || dev >= fs->devs_loaded) {
84                 dprintk("dev %d not in [0..%d)\n", dev, fs->devs_loaded);
85                 return -EIO;
86         }
87
88         bdev = fs->devs[dev].bdev;
89         return lafs_sync_page_io(bdev, sect, 0,
90                                  blocks << fs->blocksize_bits,
91                                  p, 0) ? 0 : -EIO;
92 }
93
94 static void
95 bi_async_complete(struct bio *bio, int error)
96 {
97         struct async_complete *ac = bio->bi_private;
98
99         if (test_bit(BIO_UPTODATE, &bio->bi_flags))
100                 ac->state = 3;
101         else
102                 ac->state = 4;
103         bio_put(bio);
104         lafs_wake_thread(ac->fs);
105 }
106
107 static void
108 async_page_io(struct block_device *bdev, sector_t sector, int offset, int size,
109               struct page *page, int rw, struct async_complete *ac)
110 {
111         struct bio *bio = bio_alloc(GFP_NOIO, 1);
112
113         rw |= (1 << BIO_RW_UNPLUG);
114
115         bio->bi_bdev = bdev;
116         bio->bi_sector = sector;
117         bio_add_page(bio, page, size, offset);
118         bio->bi_private = ac;
119         bio->bi_end_io = bi_async_complete;
120         submit_bio(rw, bio);
121 }
122
123 int
124 lafs_load_page_async(struct fs *fs, struct page *p, u64 vaddr,
125                      int blocks, struct async_complete *ac)
126 {
127         int dev;
128         sector_t sect;
129         struct block_device *bdev;
130
131         virttophys(fs, vaddr, &dev, &sect);
132
133         if (dev < 0 || dev >= fs->devs_loaded) {
134                 dprintk("dev %d not in [0..%d)\n", dev, fs->devs_loaded);
135                 return -EIO;
136         }
137         if (ac->state == 2)
138                 return -EAGAIN;
139         if (ac->state == 3)
140                 return 0;
141         if (ac->state == 4)
142                 return -EIO;
143
144         bdev = fs->devs[dev].bdev;
145         ac->state = 2; /* loading */
146         ac->fs = fs;
147         async_page_io(bdev, sect, 0,
148                       blocks << fs->blocksize_bits,
149                       p, 0, ac);
150         return -EAGAIN;
151 }
152
153 static void
154 bi_write_done(struct bio *bio, int error)
155 {
156         struct fs *fs = bio->bi_private;
157
158         if (atomic_dec_and_test(&fs->sb_writes_pending))
159                 wake_up(&fs->sb_writes_wait);
160         bio_put(bio);
161         /* FIXME didn't do anything with error */
162 }
163
164 void
165 lafs_super_write(struct fs *fs, int dev, u64 addr, char *buf, int size)
166 {
167         struct bio *bio = bio_alloc(GFP_NOIO, 1);
168         int rw = WRITE | (1 << BIO_RW_UNPLUG);
169
170         bio->bi_bdev = fs->devs[dev].bdev;
171         bio->bi_sector = addr;
172         bio_add_page(bio, virt_to_page(buf), size, offset_in_page(buf));
173         bio->bi_private = fs;
174         bio->bi_end_io = bi_write_done;
175         atomic_inc(&fs->sb_writes_pending);
176         submit_bio(rw, bio);
177 }
178
179 int
180 lafs_super_wait(struct fs *fs)
181 {
182         wait_event(fs->sb_writes_wait,
183                    atomic_read(&fs->sb_writes_pending) == 0
184                 );
185         return 0; /* FIXME should be an error flag */
186 }
187
188 static DECLARE_WAIT_QUEUE_HEAD(block_wait); /* need more of these later FIXME */
189
190 void lafs_io_wake(struct block *b)
191 {
192         wake_up(&block_wait);
193 }
194
195 void _lafs_iolock_block(struct block *b)
196 {
197         if (test_and_set_bit(B_IOLock, &b->flags)) {
198                 DEFINE_WAIT(wq);
199 #ifdef DEBUG_IOLOCK
200                 printk("iolock wait for %s:%d: %s\n",
201                        b->iolock_file, b->iolock_line,
202                        strblk(b));
203 #endif
204                 for (;;) {
205                         prepare_to_wait(&block_wait, &wq, TASK_UNINTERRUPTIBLE);
206
207                         if (!test_and_set_bit(B_IOLock, &b->flags))
208                                 break;
209                         schedule();
210                 }
211                 finish_wait(&block_wait, &wq);
212         }
213 }
214
215 int _lafs_iolock_block_async(struct block *b)
216 {
217         for(;;) {
218                 if (!test_and_set_bit(B_IOLock, &b->flags)) {
219                         /* just got the lock! */
220                         if (test_and_clear_bit(B_Async, &b->flags))
221                                 putref(b, MKREF(async));
222                         return 1;
223                 }
224                 if (test_and_set_bit(B_Async, &b->flags))
225                         /* already have async set */
226                         return 0;
227                 getref(b, MKREF(async));
228         }
229 }
230
231 void
232 lafs_iounlock_block(struct block *b)
233 {
234         /* Unlock this block, and if it is the last locked block
235          * for the page, unlock the page too.
236          * This only applied to data blocks.
237          */
238
239         if (test_bit(B_Index, &b->flags))
240                 clear_bit(B_IOLock, &b->flags);
241         else
242                 lafs_iocheck_block(dblk(b), 1);
243
244         lafs_io_wake(b);
245         if (test_bit(B_Async, &b->flags))
246                 lafs_wake_thread(fs_from_inode(b->inode));
247 }
248
249 void lafs_writeback_done(struct block *b)
250 {
251         /* remove writeback flag on this block.
252          * If it is last on page, release page as well.
253          */
254
255         if (test_bit(B_Index, &b->flags)) {
256                 clear_bit(B_Writeback, &b->flags);
257                 lafs_io_wake(b);
258                 if (test_bit(B_Async, &b->flags))
259                         lafs_wake_thread(fs_from_inode(b->inode));
260         } else
261                 lafs_iocheck_writeback(dblk(b), 1);
262 }
263
264 void lafs_iocheck_block(struct datablock *db, int unlock)
265 {
266         struct page *page = db->page;
267         struct datablock *blist;
268         int n, i;
269         int locked = 0;
270         int havelock = 0;
271
272         if (!page)
273                 return;
274         blist = (struct datablock *)page->private;
275         if (!blist)
276                 return;
277
278         n = 1<<(PAGE_CACHE_SHIFT - blist->b.inode->i_blkbits);
279         bit_spin_lock(B_IOLockLock, &blist->b.flags);
280         if (unlock)
281                 clear_bit(B_IOLock, &db->b.flags);
282         for (i = 0 ; i < n; i++) {
283                 if (test_bit(B_IOLock, &blist[i].b.flags))
284                         locked++;
285                 /* FIXME what about checking uptodate ?? */
286         }
287         if (!locked && test_and_clear_bit(B_HaveLock, &blist->b.flags))
288                 havelock = 1;
289         bit_spin_unlock(B_IOLockLock, &blist->b.flags);
290
291         if (havelock) {
292                 if (!PageError(page))
293                         SetPageUptodate(page);
294                 unlock_page(page);
295         }
296 }
297
298 void lafs_iocheck_writeback(struct datablock *db, int unlock)
299 {
300         struct page *page = db->page;
301         struct datablock *blist;
302         int n, i;
303         int locked = 0;
304         int havewrite = 0;
305
306         if (!page)
307                 return;
308         blist = (struct datablock *)page->private;
309         if (!blist)
310                 return;
311
312         n = 1<<(PAGE_CACHE_SHIFT - blist->b.inode->i_blkbits);
313         bit_spin_lock(B_IOLockLock, &blist->b.flags);
314         if (unlock)
315                 clear_bit(B_Writeback, &db->b.flags);
316         for (i = 0 ; i < n; i++) {
317                 if (test_bit(B_Writeback, &blist[i].b.flags))
318                         locked++;
319                 /* FIXME what about checking uptodate ?? */
320         }
321         if (!locked && test_and_clear_bit(B_HaveWriteback, &blist->b.flags))
322                 havewrite = 1;
323         bit_spin_unlock(B_IOLockLock, &blist->b.flags);
324
325         if (havewrite)
326                 end_page_writeback(page);
327         if (unlock) {
328                 lafs_io_wake(&db->b);
329                 if (test_bit(B_Async, &db->b.flags))
330                         lafs_wake_thread(fs_from_inode(db->b.inode));
331         }
332 }
333
334 int __must_check
335 lafs_wait_block(struct block *b)
336 {
337         if (test_bit(B_IOLock, &b->flags) &&
338             !test_bit(B_Valid, &b->flags)) {
339                 DEFINE_WAIT(wq);
340                 for (;;) {
341                         prepare_to_wait(&block_wait, &wq, TASK_UNINTERRUPTIBLE);
342                         if (test_bit(B_IOLock, &b->flags) &&
343                             !test_bit(B_Valid, &b->flags))
344                                 schedule();
345                         else
346                                 break;
347                 }
348                 finish_wait(&block_wait, &wq);
349         }
350         return test_bit(B_Valid, &b->flags) ? 0 : -EIO;
351 }
352
353 int __must_check
354 lafs_wait_block_async(struct block *b)
355 {
356         for (;;) {
357                 if (!test_bit(B_IOLock, &b->flags) ||
358                     test_bit(B_Valid, &b->flags)) {
359                         if (test_and_clear_bit(B_Async, &b->flags))
360                                 putref(b, MKREF(async));
361                         if (test_bit(B_Valid, &b->flags))
362                                 return 0;
363                         else
364                                 return -EIO;
365                 }
366                 if (test_and_set_bit(B_Async, &b->flags))
367                         return -EAGAIN;
368                 getref(b, MKREF(async));
369         }
370 }
371
372 static void wait_writeback(struct block *b)
373 {
374         if (test_bit(B_Writeback, &b->flags)) {
375                 DEFINE_WAIT(wq);
376 #ifdef DEBUG_IOLOCK
377                 printk("writeback wait for %s:%d: %s\n",
378                        b->iolock_file, b->iolock_line,
379                        strblk(b));
380 #endif
381                 lafs_trigger_flush(b);
382                 for (;;) {
383                         prepare_to_wait(&block_wait, &wq, TASK_UNINTERRUPTIBLE);
384                         if (test_bit(B_Writeback, &b->flags))
385                                 schedule();
386                         else
387                                 break;
388                 }
389                 finish_wait(&block_wait, &wq);
390         }
391 }
392
393 void _lafs_iolock_written(struct block *b)
394 {
395         _lafs_iolock_block(b);
396         wait_writeback(b);
397 }
398
399 int _lafs_iolock_written_async(struct block *b)
400 {
401         for (;;) {
402                 if (!test_bit(B_Writeback, &b->flags) &&
403                     !test_and_set_bit(B_IOLock, &b->flags)) {
404                         if (!test_bit(B_Writeback, &b->flags)) {
405                                 /* Have lock without writeback */
406                                 if (test_and_clear_bit(B_Async, &b->flags))
407                                         putref(b, MKREF(async));
408                                 return 1;
409                         }
410                         /* Writeback was set by a racing thread.. */
411                         lafs_iounlock_block(b);
412                 }
413                 lafs_trigger_flush(b);
414                 if (test_and_set_bit(B_Async, &b->flags))
415                         return 0;
416
417                 getref(b, MKREF(async));
418         }
419 }
420
421 static void
422 block_loaded(struct bio *bio, int error)
423 {
424         struct block *b = bio->bi_private;
425
426         dprintk("loaded %d of %d\n", (int)b->fileaddr, (int)b->inode->i_ino);
427         if (test_bit(BIO_UPTODATE, &bio->bi_flags)) {
428                 set_bit(B_Valid, &b->flags); /* FIXME should I set
429                                                 an error too? */
430         } else if (!test_bit(B_Index, &b->flags) && dblk(b)->page) {
431                 ClearPageUptodate(dblk(b)->page);
432                 SetPageError(dblk(b)->page);
433         } else
434                 dprintk("Block with no page!!\n");
435         lafs_iounlock_block(b);
436 }
437
438 static void
439 blocks_loaded(struct bio *bio, int error)
440 {
441         struct block *bhead = bio->bi_private;
442
443         while (bhead->chain) {
444                 struct block *b = bhead->chain;
445                 bhead->chain = b->chain;
446                 b->chain = NULL;
447                 bio->bi_private = b;
448                 block_loaded(bio, error);
449         }
450         bio->bi_private = bhead;
451         block_loaded(bio, error);
452 }
453
454 int __must_check
455 lafs_load_block(struct block *b, struct bio *bio)
456 {
457         int dev;
458         sector_t sect;
459         struct block_device *bdev;
460         struct fs *fs = fs_from_inode(b->inode);
461         struct page *page;
462         struct block *headb;
463         int offset;
464
465         if (!test_bit(B_PhysValid, &b->flags))
466                 b->physaddr = 0;
467         if (test_bit(B_Valid, &b->flags))
468                 return 0;
469         lafs_iolock_block(b);
470         if (test_bit(B_Valid, &b->flags)) {
471                 lafs_iounlock_block(b);
472                 return 0;
473         }
474         LAFS_BUG(test_bit(B_InoIdx, &b->flags), b);
475         if (test_bit(B_Index, &b->flags)) {
476                 struct indexblock *ib = iblk(b);
477
478                 if (b->physaddr == 0) {
479                         /* An empty index block.  One doesn't
480                          * see many of these as it means we trimmed
481                          * out some blocks, but not all following
482                          * block, and block in the hole is being
483                          * looked for.  Just Create a valid clear
484                          * index block.
485                          */
486                         lafs_clear_index(ib);
487                         lafs_iounlock_block(b);
488                         return 0;
489                 }
490
491                 page = virt_to_page(ib->data);
492                 offset = offset_in_page(ib->data);
493         } else {
494                 struct datablock *db = dblk(b);
495                 if (b->physaddr == 0) {
496                         /* block is either in the inode, or
497                          * non-existent (all 'nul').
498                          */
499                         struct lafs_inode *lai = LAFSI(b->inode);
500                         void *baddr = map_dblock(db);
501
502                         /* This case is handled in find_block */
503                         LAFS_BUG(lai->depth == 0 && b->fileaddr == 0, b);
504
505                         memset(baddr, 0, (1<<b->inode->i_blkbits));
506                         unmap_dblock(db, baddr);
507                         set_bit(B_Valid, &b->flags);
508                         lafs_iounlock_block(b);
509                         return 0;
510                 }
511                 page = db->page;
512                 offset = dblock_offset(db);
513         }
514
515         virttophys(fs, b->physaddr, &dev, &sect);
516
517         if (dev < 0) {
518                 lafs_iounlock_block(b);
519                 return -EIO;
520         }
521
522         bdev = fs->devs[dev].bdev;
523
524         if (!bio) {
525                 bio = bio_alloc(GFP_NOIO, 1);
526
527                 bio->bi_bdev = bdev;
528                 bio->bi_sector = sect;
529                 bio_add_page(bio, page, fs->blocksize, offset);
530
531                 bio->bi_private = b;
532                 bio->bi_end_io = block_loaded;
533                 submit_bio(READ, bio);
534
535                 return 0;
536         }
537         LAFS_BUG(b->chain != NULL, b);
538         if (bio->bi_size == 0) {
539                 bio->bi_sector = sect;
540                 bio->bi_bdev = bdev;
541                 bio_add_page(bio, page, fs->blocksize, offset);
542                 bio->bi_private = b;
543                 bio->bi_end_io = blocks_loaded;
544                 return 0;
545         }
546         if (bio->bi_sector + (bio->bi_size / 512) != sect
547             || bio->bi_bdev != bdev
548             || bio_add_page(bio, page, fs->blocksize, offset) == 0)
549                 return -EINVAL;
550         /* added the block successfully */
551         headb = bio->bi_private;
552         b->chain = headb->chain;
553         headb->chain = b;
554         return 0;
555 }
556
557 int __must_check
558 lafs_read_block(struct datablock *b)
559 {
560         int rv;
561
562         if (test_bit(B_Valid, &b->b.flags))
563                 return 0;
564
565         rv = lafs_find_block(b, NOADOPT);
566         if (rv)
567                 return rv;
568         rv = lafs_load_block(&b->b, NULL);
569         if (rv)
570                 return rv;
571         return lafs_wait_block(&b->b);
572 }
573
574 int __must_check
575 lafs_read_block_async(struct datablock *b)
576 {
577         int rv;
578
579         if (test_bit(B_Valid, &b->b.flags))
580                 return 0;
581
582         rv = lafs_find_block_async(b);
583         if (rv)
584                 return rv;
585         rv = lafs_load_block(&b->b, NULL);
586         if (rv)
587                 return rv;
588         return lafs_wait_block_async(&b->b);
589 }
590
591 /*------------------------------------------------------------------
592  * Writing filesystem blocks and cluster headers.
593  * The endio function is found from lafs_cluster_endio_choose.
594  * We need to increment the pending_cnt for this cluster and,
595  * if this is a header block, possibly for earlier clusters.
596  *
597  * Later should attempt to combine multiple blocks into the
598  * one bio ... if we can manage the b_endio function properly
599  */
600
601 static void write_block(struct fs *fs, struct page *p, int offset,
602                         u64 virt, struct wc *wc, int head)
603 {
604         struct bio *bio;
605         sector_t uninitialized_var(sect);
606         int which = wc->pending_next;
607         int dev;
608         int nr_vecs;
609
610         virttophys(fs, virt, &dev, &sect);
611
612         bio = wc->bio;
613         if (bio && virt == wc->bio_virt &&
614             bio->bi_bdev == fs->devs[dev].bdev &&
615             which == wc->bio_which &&
616             bio_add_page(bio, p, fs->blocksize, offset) > 0) {
617                 /* Added the current bio - too easy */
618                 wc->bio_virt++;
619                 return;
620         }
621
622         if (bio) {
623                 int w = wc->bio_which;
624                 /* need to submit the pending bio and add to pending counts */
625                 atomic_inc(&wc->pending_cnt[w]);
626                 if (wc->bio_head) {
627                         w = (w+3) % 4;
628                         if (wc->pending_vfy_type[w] == VerifyNext ||
629                             wc->pending_vfy_type[w] == VerifyNext2)
630                                 atomic_inc(&wc->pending_cnt[w]);
631                         w = (w+3) % 4;
632                         if (wc->pending_vfy_type[w] == VerifyNext2)
633                                 atomic_inc(&wc->pending_cnt[w]);
634                 }
635                 wc->bio = NULL;
636                 if (wc->bio_queue && wc->bio_queue != bdev_get_queue(bio->bi_bdev))
637                         blk_unplug(wc->bio_queue);
638                 wc->bio_queue = bdev_get_queue(bio->bi_bdev);
639                 submit_bio(WRITE, bio);
640                 bio = NULL;
641         }
642         if (!virt && !head) {
643                 /* end of cluster */
644                 if (wc->bio_queue)
645                         blk_unplug(wc->bio_queue);
646                 wc->bio_queue = NULL;
647                 return;
648         }
649         nr_vecs = 128; /* FIXME */
650         while (!bio && nr_vecs) {
651                 bio = bio_alloc(GFP_NOIO, nr_vecs);
652                 nr_vecs /= 2;
653         }
654         wc->bio = bio;
655         wc->bio_virt = virt + 1;
656         wc->bio_head = head;
657         wc->bio_which = which;
658         bio->bi_bdev = fs->devs[dev].bdev;
659         bio->bi_sector = sect;
660         bio_add_page(bio, p, fs->blocksize, offset);
661
662         bio->bi_private = wc;
663         bio->bi_end_io = lafs_cluster_endio_choose(which, head);
664 }
665
666 void lafs_write_head(struct fs *fs, struct cluster_head *head, u64 virt,
667                      struct wc *wc)
668 {
669         write_block(fs, virt_to_page(head), offset_in_page(head),
670                     virt, wc, 1);
671 }
672
673 void lafs_write_block(struct fs *fs, struct block *b, struct wc *wc)
674 {
675         if (test_bit(B_Index, &b->flags))
676                 write_block(fs, virt_to_page(iblk(b)->data),
677                             offset_in_page(iblk(b)->data),
678                             b->physaddr, wc, 0);
679         else
680                 write_block(fs, dblk(b)->page, dblock_offset(dblk(b)),
681                             b->physaddr, wc, 0);
682 }
683
684 void lafs_write_flush(struct fs *fs, struct wc *wc)
685 {
686         write_block(fs, NULL, 0, 0, wc, 0);
687 }