]> git.neil.brown.name Git - LaFS.git/blob - io.c
README update
[LaFS.git] / io.c
1
2 /*
3  * IO routines for LaFS
4  * fs/lafs/io.c
5  * Copyright (C) 2006-2009
6  * NeilBrown <neilb@suse.de>
7  * Released under the GPL, version 2
8  */
9
10 /*
11  * There are quite separate sets of routines here.
12  * One set is used for reading and writing filesystem blocks.
13  * Reading is generally asynchronous, but can be waited for.
14  * Writing is sequential into write-clusters.  It is not possible
15  * to wait for a particular write, but only to wait for a write-cluster
16  * to by safe.
17  * The other set is for all other IO such as reading/writing superblocks
18  * and stateblocks, and for reading cluster-heads during roll-forward.
19  * These reads are always synchronous while write allow all devices
20  * to be written in parallel.
21  */
22
23 #include        "lafs.h"
24 #include        <linux/blkdev.h>
25 #include        <linux/bit_spinlock.h>
26
27 int
28 lafs_dev_find(struct fs *fs, u64 virt)
29 {
30         int i;
31         for (i = 0; i < fs->devices; i++)
32                 if (virt >= fs->devs[i].start &&
33                     virt < fs->devs[i].start + fs->devs[i].size)
34                         return i;
35         printk("%llu not found:\n", (unsigned long long) virt);
36         for (i = 0; i < fs->devices; i++)
37                 printk(" %d: %llu+%llu\n", i,
38                        (unsigned long long)fs->devs[i].start,
39                        (unsigned long long)fs->devs[i].size);
40         BUG();
41         return -1;
42 }
43
44 static void bi_complete(struct bio *bio, int error)
45 {
46         complete((struct completion *)bio->bi_private);
47 }
48
49 int
50 lafs_sync_page_io(struct block_device *bdev, sector_t sector,
51                   int offset, int size,
52                   struct page *page, int rw)
53 {
54         struct bio *bio = bio_alloc(GFP_NOIO, 1);
55         struct completion event;
56         int ret;
57
58         rw |= REQ_UNPLUG;
59
60         bio->bi_bdev = bdev;
61         bio->bi_sector = sector;
62         bio_add_page(bio, page, size, offset);
63         init_completion(&event);
64         bio->bi_private = &event;
65         bio->bi_end_io = bi_complete;
66         submit_bio(rw, bio);
67         wait_for_completion(&event);
68
69         ret = !!test_bit(BIO_UPTODATE, &bio->bi_flags);
70         bio_put(bio);
71         return ret;
72 }
73
74 int
75 lafs_load_page(struct fs *fs, struct page *p, u64 vaddr, int blocks)
76 {
77         int dev;
78         sector_t sect;
79         struct block_device *bdev;
80
81         virttophys(fs, vaddr, &dev, &sect);
82
83         if (dev < 0 || dev >= fs->devs_loaded) {
84                 dprintk("dev %d not in [0..%d)\n", dev, fs->devs_loaded);
85                 return -EIO;
86         }
87
88         bdev = fs->devs[dev].bdev;
89         return lafs_sync_page_io(bdev, sect, 0,
90                                  blocks << fs->blocksize_bits,
91                                  p, 0) ? 0 : -EIO;
92 }
93
94 int
95 lafs_load_pages(struct fs *fs, struct page *p, u64 vaddr, int blocks)
96 {
97         /* load 1 or more pages which are consecutive in memory
98          * from 'p'
99          * FIXME make this async - then wait.
100          */
101         int blocks_per_page = (PAGE_SIZE >> fs->blocksize_bits);
102         int rv = 0;
103
104         while(blocks && rv == 0) {
105                 int b = blocks;
106                 if (b > blocks_per_page)
107                         b = blocks_per_page;
108                 rv = lafs_load_page(fs, p, vaddr, b);
109                 blocks -= b;
110                 vaddr += blocks_per_page;
111                 p++;
112         }
113         return rv;
114 }
115
116 static void
117 bi_async_complete(struct bio *bio, int error)
118 {
119         struct async_complete *ac = bio->bi_private;
120
121         if (test_bit(BIO_UPTODATE, &bio->bi_flags))
122                 ac->state = 3;
123         else
124                 ac->state = 4;
125         bio_put(bio);
126         lafs_wake_thread(ac->fs);
127 }
128
129 static void
130 async_page_io(struct block_device *bdev, sector_t sector, int offset, int size,
131               struct page *page, int rw, struct async_complete *ac)
132 {
133         struct bio *bio = bio_alloc(GFP_NOIO, 1);
134
135         rw |= REQ_UNPLUG;
136
137         bio->bi_bdev = bdev;
138         bio->bi_sector = sector;
139         bio_add_page(bio, page, size, offset);
140         bio->bi_private = ac;
141         bio->bi_end_io = bi_async_complete;
142         submit_bio(rw, bio);
143 }
144
145 int
146 lafs_load_page_async(struct fs *fs, struct page *p, u64 vaddr,
147                      int blocks, struct async_complete *ac)
148 {
149         int dev;
150         sector_t sect;
151         struct block_device *bdev;
152
153         virttophys(fs, vaddr, &dev, &sect);
154
155         if (dev < 0 || dev >= fs->devs_loaded) {
156                 dprintk("dev %d not in [0..%d)\n", dev, fs->devs_loaded);
157                 return -EIO;
158         }
159         if (ac->state == 2)
160                 return -EAGAIN;
161         if (ac->state == 3)
162                 return 0;
163         if (ac->state == 4)
164                 return -EIO;
165
166         bdev = fs->devs[dev].bdev;
167         ac->state = 2; /* loading */
168         ac->fs = fs;
169         async_page_io(bdev, sect, 0,
170                       blocks << fs->blocksize_bits,
171                       p, 0, ac);
172         return -EAGAIN;
173 }
174
175 static void
176 bi_write_done(struct bio *bio, int error)
177 {
178         struct fs *fs = bio->bi_private;
179
180         if (atomic_dec_and_test(&fs->sb_writes_pending))
181                 wake_up(&fs->sb_writes_wait);
182         bio_put(bio);
183         /* FIXME didn't do anything with error */
184 }
185
186 void
187 lafs_super_write(struct fs *fs, int dev, u64 addr, char *buf, int size)
188 {
189         struct bio *bio = bio_alloc(GFP_NOIO, 1);
190         int rw = WRITE | REQ_UNPLUG;
191
192         bio->bi_bdev = fs->devs[dev].bdev;
193         bio->bi_sector = addr;
194         bio_add_page(bio, virt_to_page(buf), size, offset_in_page(buf));
195         bio->bi_private = fs;
196         bio->bi_end_io = bi_write_done;
197         atomic_inc(&fs->sb_writes_pending);
198         submit_bio(rw, bio);
199 }
200
201 int
202 lafs_super_wait(struct fs *fs)
203 {
204         wait_event(fs->sb_writes_wait,
205                    atomic_read(&fs->sb_writes_pending) == 0
206                 );
207         return 0; /* FIXME should be an error flag */
208 }
209
210 static int sched(void *flags)
211 {
212         io_schedule();
213         return 0;
214 }
215
216 void _lafs_iolock_block(struct block *b)
217 {
218         while (test_and_set_bit(B_IOLock, &b->flags)) {
219 #ifdef DEBUG_IOLOCK
220                 printk("iolock wait for %s:%d: %s\n",
221                        b->iolock_file, b->iolock_line,
222                        strblk(b));
223 #endif
224                 wait_on_bit(&b->flags, B_IOLock,
225                             sched, TASK_UNINTERRUPTIBLE);
226         }
227 }
228
229 int _lafs_iolock_block_async(struct block *b)
230 {
231         for(;;) {
232                 if (!test_and_set_bit(B_IOLock, &b->flags)) {
233                         /* just got the lock! */
234                         if (test_and_clear_bit(B_Async, &b->flags))
235                                 putref(b, MKREF(async));
236                         return 1;
237                 }
238                 if (test_and_set_bit(B_Async, &b->flags))
239                         /* already have async set */
240                         return 0;
241                 getref(b, MKREF(async));
242         }
243 }
244
245 void
246 lafs_iounlock_block(struct block *b)
247 {
248         /* Unlock this block, and if it is the last locked block
249          * for the page, unlock the page too.
250          * This only applied to data blocks.
251          */
252
253         if (test_bit(B_Index, &b->flags))
254                 clear_bit(B_IOLock, &b->flags);
255         else
256                 lafs_iocheck_block(dblk(b), 1);
257
258         wake_up_bit(&b->flags, B_IOLock);
259         if (test_bit(B_Async, &b->flags))
260                 lafs_wake_thread(fs_from_inode(b->inode));
261 }
262
263 void lafs_writeback_done(struct block *b)
264 {
265         /* remove writeback flag on this block.
266          * If it is last on page, release page as well.
267          */
268
269         if (test_bit(B_Index, &b->flags)) {
270                 clear_bit(B_Writeback, &b->flags);
271                 wake_up_bit(&b->flags, B_Writeback);
272                 if (test_bit(B_Async, &b->flags))
273                         lafs_wake_thread(fs_from_inode(b->inode));
274         } else
275                 lafs_iocheck_writeback(dblk(b), 1);
276 }
277
278 void lafs_iocheck_block(struct datablock *db, int unlock)
279 {
280         struct page *page = db->page;
281         struct datablock *blist;
282         int n, i;
283         int locked = 0;
284         int havelock = 0;
285
286         if (!page)
287                 return;
288         blist = (struct datablock *)page->private;
289         if (!blist)
290                 return;
291
292         n = 1<<(PAGE_CACHE_SHIFT - blist->b.inode->i_blkbits);
293         bit_spin_lock(B_IOLockLock, &blist->b.flags);
294         if (unlock)
295                 clear_bit(B_IOLock, &db->b.flags);
296         for (i = 0 ; i < n; i++) {
297                 if (test_bit(B_IOLock, &blist[i].b.flags))
298                         locked++;
299         }
300         if (!locked && test_and_clear_bit(B_HaveLock, &blist->b.flags))
301                 havelock = 1;
302         bit_spin_unlock(B_IOLockLock, &blist->b.flags);
303
304         if (havelock) {
305                 if (!PageError(page))
306                         SetPageUptodate(page);
307                 unlock_page(page);
308         }
309 }
310
311 void lafs_iocheck_writeback(struct datablock *db, int unlock)
312 {
313         struct page *page = db->page;
314         struct datablock *blist;
315         int n, i;
316         int locked = 0;
317         int havewrite = 0;
318
319         if (!page)
320                 return;
321         blist = (struct datablock *)page->private;
322         if (!blist)
323                 return;
324
325         n = 1<<(PAGE_CACHE_SHIFT - blist->b.inode->i_blkbits);
326         bit_spin_lock(B_IOLockLock, &blist->b.flags);
327         if (unlock)
328                 clear_bit(B_Writeback, &db->b.flags);
329         for (i = 0 ; i < n; i++) {
330                 if (test_bit(B_Writeback, &blist[i].b.flags))
331                         locked++;
332                 /* FIXME what about checking uptodate ?? */
333         }
334         if (!locked && test_and_clear_bit(B_HaveWriteback, &blist->b.flags))
335                 havewrite = 1;
336         bit_spin_unlock(B_IOLockLock, &blist->b.flags);
337
338         if (havewrite)
339                 end_page_writeback(page);
340         if (unlock) {
341                 wake_up_bit(&db->b.flags, B_Writeback);
342                 if (test_bit(B_Async, &db->b.flags))
343                         lafs_wake_thread(fs_from_inode(db->b.inode));
344         }
345 }
346
347 static int sched_valid(void *flags)
348 {
349         if (test_bit(B_Valid, flags))
350                 return -EINTR;
351
352         schedule();
353         return 0;
354 }
355
356 int __must_check
357 lafs_wait_block(struct block *b)
358 {
359         if (test_bit(B_IOLock, &b->flags) &&
360             !test_bit(B_Valid, &b->flags))
361                 wait_on_bit(&b->flags, B_IOLock, 
362                             sched_valid, TASK_UNINTERRUPTIBLE);
363
364         return test_bit(B_Valid, &b->flags) ? 0 : -EIO;
365 }
366
367 int __must_check
368 lafs_wait_block_async(struct block *b)
369 {
370         for (;;) {
371                 if (!test_bit(B_IOLock, &b->flags) ||
372                     test_bit(B_Valid, &b->flags)) {
373                         if (test_and_clear_bit(B_Async, &b->flags))
374                                 putref(b, MKREF(async));
375                         if (test_bit(B_Valid, &b->flags))
376                                 return 0;
377                         else
378                                 return -EIO;
379                 }
380                 if (test_and_set_bit(B_Async, &b->flags))
381                         return -EAGAIN;
382                 getref(b, MKREF(async));
383         }
384 }
385
386 static void wait_writeback(struct block *b)
387 {
388         if (test_bit(B_Writeback, &b->flags)) {
389 #ifdef DEBUG_IOLOCK
390                 printk("writeback wait for %s:%d: %s\n",
391                        b->iolock_file, b->iolock_line,
392                        strblk(b));
393 #endif
394                 lafs_trigger_flush(b);
395                 wait_on_bit(&b->flags, B_Writeback,
396                             sched, TASK_UNINTERRUPTIBLE);
397         }
398 }
399
400 void _lafs_iolock_written(struct block *b)
401 {
402         _lafs_iolock_block(b);
403         wait_writeback(b);
404 }
405
406 int _lafs_iolock_written_async(struct block *b)
407 {
408         for (;;) {
409                 if (!test_bit(B_Writeback, &b->flags) &&
410                     !test_and_set_bit(B_IOLock, &b->flags)) {
411                         if (!test_bit(B_Writeback, &b->flags)) {
412                                 /* Have lock without writeback */
413                                 if (test_and_clear_bit(B_Async, &b->flags))
414                                         putref(b, MKREF(async));
415                                 return 1;
416                         }
417                         /* Writeback was set by a racing thread.. */
418                         lafs_iounlock_block(b);
419                 }
420                 lafs_trigger_flush(b);
421                 if (test_and_set_bit(B_Async, &b->flags))
422                         return 0;
423
424                 getref(b, MKREF(async));
425         }
426 }
427
428 static void
429 block_loaded(struct bio *bio, int error)
430 {
431         struct block *b = bio->bi_private;
432
433         dprintk("loaded %d of %d\n", (int)b->fileaddr, (int)b->inode->i_ino);
434         if (test_bit(BIO_UPTODATE, &bio->bi_flags)) {
435                 set_bit(B_Valid, &b->flags); /* FIXME should I set
436                                                 an error too? */
437         } else if (!test_bit(B_Index, &b->flags) && dblk(b)->page) {
438                 ClearPageUptodate(dblk(b)->page);
439                 SetPageError(dblk(b)->page);
440         } else
441                 dprintk("Block with no page!!\n");
442         lafs_iounlock_block(b);
443 }
444
445 static void
446 blocks_loaded(struct bio *bio, int error)
447 {
448         struct block *bhead = bio->bi_private;
449
450         while (bhead->chain) {
451                 struct block *b = bhead->chain;
452                 bhead->chain = b->chain;
453                 b->chain = NULL;
454                 bio->bi_private = b;
455                 block_loaded(bio, error);
456         }
457         bio->bi_private = bhead;
458         block_loaded(bio, error);
459 }
460
461 int __must_check
462 lafs_load_block(struct block *b, struct bio *bio)
463 {
464         int dev;
465         sector_t sect;
466         struct block_device *bdev;
467         struct fs *fs = fs_from_inode(b->inode);
468         struct page *page;
469         struct block *headb;
470         int offset;
471
472         if (!test_bit(B_PhysValid, &b->flags))
473                 b->physaddr = 0;
474         if (test_bit(B_Valid, &b->flags))
475                 return 0;
476         lafs_iolock_block(b);
477         if (test_bit(B_Valid, &b->flags)) {
478                 lafs_iounlock_block(b);
479                 return 0;
480         }
481         LAFS_BUG(test_bit(B_InoIdx, &b->flags), b);
482         if (test_bit(B_Index, &b->flags)) {
483                 struct indexblock *ib = iblk(b);
484
485                 if (b->physaddr == 0) {
486                         /* An empty index block.  One doesn't
487                          * see many of these as it means we trimmed
488                          * out some blocks, but not all following
489                          * block, and block in the hole is being
490                          * looked for.  Just Create a valid clear
491                          * index block.
492                          */
493                         lafs_clear_index(ib);
494                         lafs_iounlock_block(b);
495                         return 0;
496                 }
497
498                 page = virt_to_page(ib->data);
499                 offset = offset_in_page(ib->data);
500         } else {
501                 struct datablock *db = dblk(b);
502                 if (b->physaddr == 0) {
503                         /* block is either in the inode, or
504                          * non-existent (all 'nul').
505                          */
506                         struct lafs_inode *lai = LAFSI(b->inode);
507                         void *baddr = map_dblock(db);
508
509                         /* This case is handled in find_block */
510                         LAFS_BUG(lai->depth == 0 && b->fileaddr == 0, b);
511
512                         memset(baddr, 0, (1<<b->inode->i_blkbits));
513                         unmap_dblock(db, baddr);
514                         set_bit(B_Valid, &b->flags);
515                         lafs_iounlock_block(b);
516                         return 0;
517                 }
518                 page = db->page;
519                 offset = dblock_offset(db);
520         }
521
522         virttophys(fs, b->physaddr, &dev, &sect);
523
524         if (dev < 0) {
525                 lafs_iounlock_block(b);
526                 return -EIO;
527         }
528
529         bdev = fs->devs[dev].bdev;
530
531         if (!bio) {
532                 bio = bio_alloc(GFP_NOIO, 1);
533
534                 bio->bi_bdev = bdev;
535                 bio->bi_sector = sect;
536                 bio_add_page(bio, page, fs->blocksize, offset);
537
538                 bio->bi_private = b;
539                 bio->bi_end_io = block_loaded;
540                 submit_bio(READ, bio);
541
542                 return 0;
543         }
544         LAFS_BUG(b->chain != NULL, b);
545         if (bio->bi_size == 0) {
546                 bio->bi_sector = sect;
547                 bio->bi_bdev = bdev;
548                 bio_add_page(bio, page, fs->blocksize, offset);
549                 bio->bi_private = b;
550                 bio->bi_end_io = blocks_loaded;
551                 return 0;
552         }
553         if (bio->bi_sector + (bio->bi_size / 512) != sect
554             || bio->bi_bdev != bdev
555             || bio_add_page(bio, page, fs->blocksize, offset) == 0)
556                 return -EINVAL;
557         /* added the block successfully */
558         headb = bio->bi_private;
559         b->chain = headb->chain;
560         headb->chain = b;
561         return 0;
562 }
563
564 int __must_check
565 lafs_read_block(struct datablock *b)
566 {
567         int rv;
568
569         if (test_bit(B_Valid, &b->b.flags))
570                 return 0;
571
572         rv = lafs_find_block(b, NOADOPT);
573         if (rv)
574                 return rv;
575         rv = lafs_load_block(&b->b, NULL);
576         if (rv)
577                 return rv;
578         return lafs_wait_block(&b->b);
579 }
580
581 int __must_check
582 lafs_read_block_async(struct datablock *b)
583 {
584         int rv;
585
586         if (test_bit(B_Valid, &b->b.flags))
587                 return 0;
588
589         rv = lafs_find_block_async(b);
590         if (rv)
591                 return rv;
592         rv = lafs_load_block(&b->b, NULL);
593         if (rv)
594                 return rv;
595         return lafs_wait_block_async(&b->b);
596 }
597
598 /*------------------------------------------------------------------
599  * Writing filesystem blocks and cluster headers.
600  * The endio function is found from lafs_cluster_endio_choose.
601  * We need to increment the pending_cnt for this cluster and,
602  * if this is a header block, possibly for earlier clusters.
603  *
604  * Later should attempt to combine multiple blocks into the
605  * one bio ... if we can manage the bi_end_io function properly
606  */
607
608 static void write_block(struct fs *fs, struct page *p, int offset,
609                         u64 virt, struct wc *wc, int head)
610 {
611         struct bio *bio;
612         sector_t uninitialized_var(sect);
613         int which = wc->pending_next;
614         int dev;
615         int nr_vecs;
616
617         virttophys(fs, virt, &dev, &sect);
618
619         bio = wc->bio;
620         if (bio && virt == wc->bio_virt &&
621             bio->bi_bdev == fs->devs[dev].bdev &&
622             which == wc->bio_which &&
623             bio_add_page(bio, p, fs->blocksize, offset) > 0) {
624                 /* Added the current bio - too easy */
625                 wc->bio_virt++;
626                 return;
627         }
628
629         if (bio) {
630                 int w = wc->bio_which;
631                 /* need to submit the pending bio and add to pending counts */
632                 atomic_inc(&wc->pending_cnt[w]);
633                 if (wc->bio_head) {
634                         w = (w+3) % 4;
635                         if (wc->pending_vfy_type[w] == VerifyNext ||
636                             wc->pending_vfy_type[w] == VerifyNext2)
637                                 atomic_inc(&wc->pending_cnt[w]);
638                         w = (w+3) % 4;
639                         if (wc->pending_vfy_type[w] == VerifyNext2)
640                                 atomic_inc(&wc->pending_cnt[w]);
641                 }
642                 wc->bio = NULL;
643                 if (wc->bio_queue && wc->bio_queue != bdev_get_queue(bio->bi_bdev))
644                         blk_unplug(wc->bio_queue);
645                 wc->bio_queue = bdev_get_queue(bio->bi_bdev);
646                 submit_bio(WRITE, bio);
647                 bio = NULL;
648         }
649         if (!virt && !head) {
650                 /* end of cluster */
651                 if (wc->bio_queue)
652                         blk_unplug(wc->bio_queue);
653                 wc->bio_queue = NULL;
654                 return;
655         }
656         nr_vecs = 128; /* FIXME */
657         while (!bio && nr_vecs) {
658                 bio = bio_alloc(GFP_NOIO, nr_vecs);
659                 nr_vecs /= 2;
660         }
661         wc->bio = bio;
662         wc->bio_virt = virt + 1;
663         wc->bio_head = head;
664         wc->bio_which = which;
665         bio->bi_bdev = fs->devs[dev].bdev;
666         bio->bi_sector = sect;
667         bio_add_page(bio, p, fs->blocksize, offset);
668
669         bio->bi_private = wc;
670         bio->bi_end_io = lafs_cluster_endio_choose(which, head);
671 }
672
673 void lafs_write_head(struct fs *fs, struct cluster_head *head, u64 virt,
674                      struct wc *wc)
675 {
676         write_block(fs, virt_to_page(head), offset_in_page(head),
677                     virt, wc, 1);
678 }
679
680 void lafs_write_block(struct fs *fs, struct block *b, struct wc *wc)
681 {
682         if (test_bit(B_Index, &b->flags))
683                 write_block(fs, virt_to_page(iblk(b)->data),
684                             offset_in_page(iblk(b)->data),
685                             b->physaddr, wc, 0);
686         else
687                 write_block(fs, dblk(b)->page, dblock_offset(dblk(b)),
688                             b->physaddr, wc, 0);
689 }
690
691 void lafs_write_flush(struct fs *fs, struct wc *wc)
692 {
693         write_block(fs, NULL, 0, 0, wc, 0);
694 }