4 * Copyright (C) 2005-2009
5 * Neil Brown <neilb@suse.de>
6 * Released under the GPL, version 2
12 #include <linux/bit_spinlock.h>
13 #include <linux/writeback.h>
14 #include <linux/version.h>
16 struct readpage_data {
22 readpage_filler(void *data, struct page *page)
24 struct inode *ino = page->mapping->host;
25 struct readpage_data *rpd = data;
27 int n = 1 << (PAGE_SHIFT - ino->i_blkbits);
31 rpd->pages_remaining--;
32 b0 = lafs_get_block(ino, 0, page, GFP_KERNEL,
38 LAFS_BUG(test_bit(B_HaveLock, &b0->b.flags), &b0->b);
40 dprintk("read page %p for %d blocks\n", page, n);
41 for (i = 0; i < n; i++) {
42 struct datablock *b = lafs_get_block(ino, i, page, GFP_KERNEL,
46 err = lafs_find_block(b, NOADOPT);
48 putdref(b, MKREF(readpage));
51 if (test_bit(B_Valid, &b->b.flags)) {
52 putdref(b, MKREF(readpage));
57 rpd->bio = bio_alloc(GFP_NOFS, rpd->pages_remaining + 1);
58 err = lafs_load_block(&b->b, rpd->bio);
60 submit_bio(READ, rpd->bio);
64 putdref(b, MKREF(readpage));
68 set_bit(B_HaveLock, &b0->b.flags);
69 lafs_iocheck_block(b0, 0);
70 putdref(b0, MKREF(readpage0));
75 lafs_readpage(struct file *file, struct page *page)
77 /* Page is locked. We want to read it all in,
78 * and then unlock the page.
79 * For this we get all the blocks and load them
82 struct readpage_data rpd;
85 rpd.pages_remaining = 1;
86 err = readpage_filler(&rpd, page);
90 submit_bio(READ, rpd.bio);
98 lafs_readpages(struct file *file, struct address_space *mapping,
99 struct list_head *pages, unsigned nr_pages)
102 struct readpage_data rpd;
105 rpd.pages_remaining = nr_pages;
106 err = read_cache_pages(mapping, pages, readpage_filler, &rpd);
109 if (rpd.bio->bi_size)
110 submit_bio(READ, rpd.bio);
119 lafs_write_begin(struct file *file, struct address_space *mapping,
120 loff_t pos, unsigned len, unsigned flags,
121 struct page **pagep, void **fsdata)
123 /* The range covers one or more blocks in this page
124 * and may partially cover the first or last.
125 * If there is partial coverage, we need to read in
127 * Then we 'reserve' those datablocks to ensure that
128 * a write will succeed.
130 struct inode *ino = mapping->host;
131 struct fs *fs = fs_from_inode(ino);
132 int bits = ino->i_blkbits;
133 int first, last, loading = -1;
134 struct datablock *fb;
142 index = pos >> PAGE_CACHE_SHIFT;
143 from = pos & (PAGE_CACHE_SIZE - 1);
146 first = from >> bits;
147 last = (to-1) >> bits;
149 while (test_bit(I_Trunc, &LAFSI(ino)->iflags) &&
150 LAFSI(ino)->trunc_next <= last) {
151 struct datablock *db = lafs_inode_dblock(ino,
152 SYNC, MKREF(writetrunc));
153 prepare_to_wait(&fs->async_complete, &wq,
154 TASK_UNINTERRUPTIBLE);
155 lafs_inode_handle_orphan(db);
156 if (test_bit(B_Orphan, &db->b.flags))
158 putdref(db, MKREF(writetrunc));
160 finish_wait(&fs->async_complete, &wq);
162 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29)
163 page = __grab_cache_page(mapping, index);
165 page = grab_cache_page_write_begin(mapping, index, flags);
171 fb = lafs_get_block(ino, first, page, GFP_KERNEL, MKREF(write));
172 dprintk("PREPARE %p\n", fb);
177 /* Further lafs_get_block calls cannot fail as both the page
178 * and the block structures exist
180 for (i = first + 1 ; i <= last ; i++)
181 lafs_get_block(ino, i, page, GFP_KERNEL, MKREF(write));
183 if (from != (first << bits)) {
184 err = lafs_find_block(fb, NOADOPT);
186 err = lafs_load_block(&fb->b, NULL);
191 if (last != loading &&
192 to != ((last+1) << bits)) {
193 struct datablock *lb = fb + last - first;
195 err = lafs_find_block(lb, ADOPT);
197 err = lafs_load_block(&lb->b, NULL);
201 err = lafs_wait_block(&fb[last-first].b);
205 if (loading == first)
206 err = lafs_wait_block(&fb->b);
210 lafs_checkpoint_lock(fs);
211 for (i = first ; err == 0 && i <= last ; i++) {
212 /* FIXME need PinPending or something to make sure
213 * credits don't disappear */
214 err = lafs_reserve_block(&fb[i - first].b, NewSpace);
215 LAFS_BUG(fb[i-first].b.parent == NULL, &fb[i-first].b);
217 if (err == -EAGAIN) {
218 lafs_checkpoint_unlock_wait(fs);
228 lafs_checkpoint_unlock(fs);
231 for (i = first; i <= last ; i++)
232 putdref(&fb[i-first], MKREF(write));
234 page_cache_release(page);
239 lafs_write_end(struct file *file,
240 struct address_space *mapping,
241 loff_t pos, unsigned len, unsigned copied,
242 struct page *page, void *fsdata)
244 struct inode *ino = mapping->host;
245 struct fs *fs = fs_from_inode(ino);
246 int bits = ino->i_blkbits;
247 unsigned first, last;
248 struct datablock *fb;
253 index = pos >> PAGE_CACHE_SHIFT;
254 from = pos & (PAGE_CACHE_SIZE - 1);
257 first = from >> bits;
258 last = (to-1) >> bits;
262 if (unlikely(copied < len)) {
263 if (!PageUptodate(page))
268 * No need to use i_size_read() here, the i_size
269 * cannot change under us because we hold i_sem.
271 if (pos + len > ino->i_size) {
272 i_size_write(ino, pos + len);
273 /* note that we deliberately don't call
274 * mark_inode_dirty(ino);
275 * The inode will automatically be written
276 * when the data blocks have been,
277 * and there is no rush before that.
278 * Just set I_Dirty and allow
279 * __set_page_dirty_nobuffers to do the rest
281 set_bit(I_Dirty, &LAFSI(ino)->iflags);
283 __set_page_dirty_nobuffers(page);
284 for (i = first ; i <= last ; i++) {
286 set_bit(B_Valid, &(fb+i-first)->b.flags);
287 lafs_dirty_dblock(fb+i-first);
289 putdref(fb+i-first, MKREF(write));
291 lafs_checkpoint_unlock(fs);
293 page_cache_release(page);
298 lafs_writepage(struct page *page, struct writeback_control *wbc)
300 struct inode *ino = page->mapping->host;
301 struct datablock *b0 = NULL;
302 int blocks = PAGE_SIZE >> ino->i_blkbits;
305 dprintk("WRITEPAGE %lu/%lu/%lu\n", LAFSI(ino)->filesys->i_ino, ino->i_ino, page->index * blocks);
306 for (i = 0 ; i < blocks; i++) {
307 struct datablock *b = lafs_get_block(ino, i, page, GFP_KERNEL,
312 b0 = getdref(b, MKREF(writepage0));
314 /* We need to check PinPending, otherwise we might be called
315 * to flush out a page that is currently part of a transaction.
316 * Need to be careful with inodes too.
318 if (test_bit(B_Dirty, &b->b.flags)) {
319 struct inode *myi = NULL;
320 if (test_bit(B_PinPending, &b->b.flags))
322 else if ((myi = rcu_my_inode(b)) != NULL &&
326 rcu_iput(myi); myi = NULL;
327 lafs_iolock_written(&b->b);
328 /* block might have been invalidated,
329 * or Pinned, while we waited */
330 if (!test_bit(B_Dirty, &b->b.flags))
331 lafs_iounlock_block(&b->b);
332 else if (test_bit(B_PinPending, &b->b.flags) ||
333 ((myi = rcu_my_inode(b)) != NULL &&
334 LAFSI(myi)->iblock)) {
336 lafs_iounlock_block(&b->b);
338 rcu_iput(myi); myi = NULL;
339 lafs_cluster_allocate(&b->b, 0);
344 putdref(b, MKREF(writepage));
347 redirty_page_for_writepage(wbc, page);
350 set_page_writeback(page);
351 set_bit(B_HaveWriteback, &b0->b.flags);
352 lafs_iocheck_writeback(b0, 0);
357 putdref(b0, MKREF(writepage0));
361 if (LAFSI(ino)->depth == 0) {
362 /* We really want the data to be safe soon, not just
363 * the page to be clean. And the data is in the inode.
364 * So write the inode.
366 struct datablock *b = lafs_inode_dblock(ino, SYNC,
367 MKREF(writepageflush));
368 if (test_bit(B_Dirty, &b->b.flags)) {
369 lafs_iolock_written(&b->b);
370 /* block might have been invalidated while we waited */
371 if (test_bit(B_Dirty, &b->b.flags))
372 lafs_cluster_allocate(&b->b, 0);
374 lafs_iounlock_block(&b->b);
376 putdref(b, MKREF(writepageflush));
381 static void lafs_sync_page(struct page *page)
383 /* If any block is dirty, flush the cluster so that
384 * writeback completes.
387 struct address_space *mapping;
393 if (!PageWriteback(page))
394 /* Presumably page is locked - nothing
396 * FIXME should unplug the queue if page
397 * is not up to date, and has blocks.
401 mapping = page->mapping;
405 fs = fs_from_inode(ino);
406 bits = PAGE_SHIFT - ino->i_blkbits;
408 spin_lock(&mapping->private_lock);
409 if (PagePrivate(page)) {
410 struct datablock *bl = (struct datablock *)page->private;
412 for (i = 0; i < (1<<bits); i++) {
413 /* If this block is still dirty though the page is in
414 * writeback, the block must be in the current cluster
416 if (test_bit(B_Dirty, &bl[i].b.flags)) {
420 if (test_bit(B_Writeback, &bl[i].b.flags))
424 spin_unlock(&mapping->private_lock);
426 set_bit(FlushNeeded, &fs->fsstate);
427 else if (want_flush == 1)
428 set_bit(SecondFlushNeeded, &fs->fsstate);
429 lafs_wake_thread(fs);
433 lafs_sync_file(struct file *file, int datasync)
435 struct inode *ino = file->f_dentry->d_inode;
436 int err = lafs_sync_inode(ino, 0);
438 /* FIXME I ignore datasync, just like file_fsync */
439 err = write_inode_now(ino, 0) ?: err;
442 lafs_sync_inode(ino, 1);
446 void lafs_fillattr(struct inode *ino, struct kstat *stat)
448 /* This makes sure the reported 'atime' is a time that
449 * we can store and return after a clean restart
451 generic_fillattr(ino, stat);
453 if (!test_bit(I_AccessTime, &LAFSI(ino)->iflags))
456 stat->atime = LAFSI(ino)->md.file.i_accesstime;
457 lafs_add_atime_offset(&stat->atime, LAFSI(ino)->md.file.atime_offset);
460 int lafs_getattr(struct vfsmount *mnt, struct dentry *dentry,
463 lafs_fillattr(dentry->d_inode, stat);
467 const struct file_operations lafs_file_file_operations = {
468 .llseek = generic_file_llseek,
469 .read = do_sync_read,
470 .write = do_sync_write,
471 .aio_read = generic_file_aio_read,
472 .aio_write = generic_file_aio_write,
473 /* .ioctl = lafs__ioctl,*/
474 .mmap = generic_file_mmap,
475 .open = generic_file_open,
476 /* .release = lafs__release_file,*/
477 .fsync = lafs_sync_file,
478 .splice_read = generic_file_splice_read,
479 .splice_write = generic_file_splice_write,
482 const struct inode_operations lafs_file_ino_operations = {
483 .setattr = lafs_setattr,
484 .getattr = lafs_getattr,
487 const struct address_space_operations lafs_file_aops = {
488 .readpage = lafs_readpage,
489 .readpages = lafs_readpages,
490 .writepage = lafs_writepage,
491 .write_begin = lafs_write_begin,
492 .write_end = lafs_write_end,
493 .invalidatepage = lafs_invalidate_page,
494 .releasepage = lafs_release_page,
495 .sync_page = lafs_sync_page,