file.c

   1
   2 /*
   3  * fs/lafs/file.c
   4  * Copyright (C) 2005-2009
   5  * Neil Brown <neilb@suse.de>
   6  * Released under the GPL, version 2
   7  *
   8  * File operations
   9  */
  10
  11 #include        "lafs.h"
  12 #include        <linux/bit_spinlock.h>
  13 #include        <linux/writeback.h>
  14 #include        <linux/version.h>
  15
  16 struct readpage_data {
  17         struct bio *bio;
  18         int pages_remaining;
  19 };
  20
  21 static int
  22 readpage_filler(void *data, struct page *page)
  23 {
  24         struct inode *ino = page->mapping->host;
  25         struct readpage_data *rpd = data;
  26         struct datablock *b0;
  27         int n = 1 << (PAGE_SHIFT - ino->i_blkbits);
  28         int i;
  29         int err = 0;
  30
  31         rpd->pages_remaining--;
  32         b0 = lafs_get_block(ino, 0, page, GFP_KERNEL,
  33                             MKREF(readpage0));
  34
  35         if (!b0)
  36                 return -ENOMEM;
  37
  38         LAFS_BUG(test_bit(B_HaveLock, &b0->b.flags), &b0->b);
  39
  40         dprintk("read page %p for %d blocks\n", page, n);
  41         for (i = 0; i < n; i++) {
  42                 struct datablock *b = lafs_get_block(ino, i, page, GFP_KERNEL,
  43                                                      MKREF(readpage));
  44                 BUG_ON(!b);
  45
  46                 err = lafs_find_block(b, NOADOPT);
  47                 if (err) {
  48                         putdref(b, MKREF(readpage));
  49                         break;
  50                 }
  51                 if (test_bit(B_Valid, &b->b.flags)) {
  52                         putdref(b, MKREF(readpage));
  53                         continue;
  54                 }
  55         retry:
  56                 if (rpd->bio == NULL)
  57                         rpd->bio = bio_alloc(GFP_NOFS, rpd->pages_remaining + 1);
  58                 err = lafs_load_block(&b->b, rpd->bio);
  59                 if (err == -EINVAL) {
  60                         submit_bio(READ, rpd->bio);
  61                         rpd->bio = NULL;
  62                         goto retry;
  63                 }
  64                 putdref(b, MKREF(readpage));
  65                 if (err)
  66                         break;
  67         }
  68         set_bit(B_HaveLock, &b0->b.flags);
  69         lafs_iocheck_block(b0, 0);
  70         putdref(b0, MKREF(readpage0));
  71         return err;
  72 }
  73
  74 static int
  75 lafs_readpage(struct file *file, struct page *page)
  76 {
  77         /* Page is locked.  We want to read it all in,
  78          * and then unlock the page.
  79          * For this we get all the blocks and load them
  80          */
  81         int err = 0;
  82         struct readpage_data rpd;
  83
  84         rpd.bio = NULL;
  85         rpd.pages_remaining = 1;
  86         err = readpage_filler(&rpd, page);
  87
  88         if (rpd.bio) {
  89                 if (rpd.bio->bi_size)
  90                         submit_bio(READ, rpd.bio);
  91                 else
  92                         bio_put(rpd.bio);
  93         }
  94         return err;
  95 }
  96
  97 static int
  98 lafs_readpages(struct file *file, struct address_space *mapping,
  99                struct list_head *pages, unsigned nr_pages)
 100 {
 101         int err = 0;
 102         struct readpage_data rpd;
 103
 104         rpd.bio = NULL;
 105         rpd.pages_remaining = nr_pages;
 106         err = read_cache_pages(mapping, pages, readpage_filler, &rpd);
 107
 108         if (rpd.bio) {
 109                 if (rpd.bio->bi_size)
 110                         submit_bio(READ, rpd.bio);
 111                 else
 112                         bio_put(rpd.bio);
 113         }
 114         return err;
 115 }
 116
 117
 118 static int
 119 lafs_write_begin(struct file *file, struct address_space *mapping,
 120                  loff_t pos, unsigned len, unsigned flags,
 121                  struct page **pagep, void **fsdata)
 122 {
 123         /* The range covers one or more blocks in this page
 124          * and may partially cover the first or last.
 125          * If there is partial coverage, we need to read in
 126          * those blocks.
 127          * Then we 'reserve' those datablocks to ensure that
 128          * a write will succeed.
 129          */
 130         struct inode *ino = mapping->host;
 131         struct fs *fs = fs_from_inode(ino);
 132         int bits = ino->i_blkbits;
 133         int first, last, loading = -1;
 134         struct datablock *fb;
 135         int err = 0;
 136         int i;
 137         pgoff_t index;
 138         unsigned from, to;
 139         struct page *page;
 140         DEFINE_WAIT(wq);
 141
 142         index = pos >> PAGE_CACHE_SHIFT;
 143         from = pos & (PAGE_CACHE_SIZE - 1);
 144         to = from + len;
 145
 146         first = from >> bits;
 147         last = (to-1) >> bits;
 148
 149         while (test_bit(I_Trunc, &LAFSI(ino)->iflags) &&
 150                LAFSI(ino)->trunc_next <= last) {
 151                 struct datablock *db = lafs_inode_dblock(ino,
 152                                                          SYNC, MKREF(writetrunc));
 153                 prepare_to_wait(&fs->async_complete, &wq,
 154                                 TASK_UNINTERRUPTIBLE);
 155                 lafs_inode_handle_orphan(db);
 156                 if (test_bit(B_Orphan, &db->b.flags))
 157                         schedule();
 158                 putdref(db, MKREF(writetrunc));
 159         }
 160         finish_wait(&fs->async_complete, &wq);
 161
 162 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29)
 163         page = __grab_cache_page(mapping, index);
 164 #else
 165         page = grab_cache_page_write_begin(mapping, index, flags);
 166 #endif
 167         if (!page)
 168                 return -ENOMEM;
 169         *pagep = page;
 170
 171         fb = lafs_get_block(ino, first, page, GFP_KERNEL, MKREF(write));
 172         dprintk("PREPARE %p\n", fb);
 173         if (!fb) {
 174                 err = -ENOMEM;
 175                 goto fail;
 176         }
 177         /* Further lafs_get_block calls cannot fail as both the page
 178          * and the block structures exist
 179          */
 180         for (i = first + 1 ; i <= last ; i++)
 181                 lafs_get_block(ino, i, page, GFP_KERNEL, MKREF(write));
 182
 183         if (from != (first << bits)) {
 184                 err = lafs_find_block(fb, NOADOPT);
 185                 if (!err)
 186                         err = lafs_load_block(&fb->b, NULL);
 187                 if (err)
 188                         goto fail;
 189                 loading = first;
 190         }
 191         if (last != loading &&
 192             to != ((last+1) << bits)) {
 193                 struct datablock *lb = fb + last - first;
 194
 195                 err = lafs_find_block(lb, ADOPT);
 196                 if (!err)
 197                         err = lafs_load_block(&lb->b, NULL);
 198                 if (err)
 199                         goto fail;
 200
 201                 err = lafs_wait_block(&fb[last-first].b);
 202                 if (err)
 203                         goto fail;
 204         }
 205         if (loading == first)
 206                 err = lafs_wait_block(&fb->b);
 207         if (err)
 208                 goto fail;
 209 retry:
 210         lafs_checkpoint_lock(fs);
 211         for (i = first ; err == 0 && i <= last ; i++) {
 212                 /* FIXME need PinPending or something to make sure
 213                  * credits don't disappear */
 214                 err = lafs_reserve_block(&fb[i - first].b, NewSpace);
 215                 LAFS_BUG(fb[i-first].b.parent == NULL, &fb[i-first].b);
 216         }
 217         if (err == -EAGAIN) {
 218                 lafs_checkpoint_unlock_wait(fs);
 219                 err = 0;
 220                 goto retry;
 221         }
 222         if (err < 0)
 223                 goto fail_unlock;
 224         *fsdata = fb;
 225         return 0;
 226
 227 fail_unlock:
 228         lafs_checkpoint_unlock(fs);
 229 fail:
 230         if (fb)
 231                 for (i = first; i <= last ; i++)
 232                         putdref(&fb[i-first], MKREF(write));
 233         unlock_page(page);
 234         page_cache_release(page);
 235         return err;
 236 }
 237
 238 static int
 239 lafs_write_end(struct file *file,
 240                struct address_space *mapping,
 241                loff_t pos, unsigned len, unsigned copied,
 242                struct page *page, void *fsdata)
 243 {
 244         struct inode *ino = mapping->host;
 245         struct fs *fs = fs_from_inode(ino);
 246         int bits = ino->i_blkbits;
 247         unsigned first, last;
 248         struct datablock *fb;
 249         unsigned i;
 250         pgoff_t index;
 251         unsigned from, to;
 252
 253         index = pos >> PAGE_CACHE_SHIFT;
 254         from = pos & (PAGE_CACHE_SIZE - 1);
 255         to = from + len;
 256
 257         first = from >> bits;
 258         last = (to-1) >> bits;
 259
 260         fb = fsdata;
 261
 262         if (unlikely(copied < len)) {
 263                 if (!PageUptodate(page))
 264                         copied = 0;
 265         }
 266
 267         /*
 268          * No need to use i_size_read() here, the i_size
 269          * cannot change under us because we hold i_sem.
 270          */
 271         if (pos + len > ino->i_size) {
 272                 i_size_write(ino, pos + len);
 273                 /* note that we deliberately don't call
 274                  * mark_inode_dirty(ino);
 275                  * The inode will automatically be written
 276                  * when the data blocks have been,
 277                  * and there is no rush before that.
 278                  * Just set I_Dirty and allow
 279                  * __set_page_dirty_nobuffers to do the rest
 280                  */
 281                 set_bit(I_Dirty, &LAFSI(ino)->iflags);
 282         }
 283         __set_page_dirty_nobuffers(page);
 284         for (i = first ; i <= last ; i++) {
 285                 if (copied > 0) {
 286                         set_bit(B_Valid, &(fb+i-first)->b.flags);
 287                         lafs_dirty_dblock(fb+i-first);
 288                 }
 289                 putdref(fb+i-first, MKREF(write));
 290         }
 291         lafs_checkpoint_unlock(fs);
 292         unlock_page(page);
 293         page_cache_release(page);
 294         return copied;
 295 }
 296
 297 static int
 298 lafs_writepage(struct page *page, struct writeback_control *wbc)
 299 {
 300         struct inode *ino = page->mapping->host;
 301         struct datablock *b0 = NULL;
 302         int blocks = PAGE_SIZE >> ino->i_blkbits;
 303         int i;
 304         int redirty = 0;
 305         dprintk("WRITEPAGE %lu/%lu/%lu\n", LAFSI(ino)->filesys->i_ino, ino->i_ino, page->index * blocks);
 306         for (i = 0 ; i < blocks; i++) {
 307                 struct datablock *b = lafs_get_block(ino, i, page, GFP_KERNEL,
 308                                                      MKREF(writepage));
 309                 if (!b)
 310                         continue;
 311                 if (i == 0)
 312                         b0 = getdref(b, MKREF(writepage0));
 313
 314                 /* We need to check PinPending, otherwise we might be called
 315                  * to flush out a page that is currently part of a transaction.
 316                  * Need to be careful with inodes too.
 317                  */
 318                 if (test_bit(B_Dirty, &b->b.flags)) {
 319                         struct inode *myi = NULL;
 320                         if (test_bit(B_PinPending, &b->b.flags))
 321                                 redirty = 1;
 322                         else if ((myi = rcu_my_inode(b)) != NULL &&
 323                                  LAFSI(myi)->iblock)
 324                                 redirty = 1;
 325                         else {
 326                                 rcu_iput(myi); myi = NULL;
 327                                 lafs_iolock_written(&b->b);
 328                                 /* block might have been invalidated,
 329                                  * or Pinned, while we waited */
 330                                 if (!test_bit(B_Dirty, &b->b.flags))
 331                                         lafs_iounlock_block(&b->b);
 332                                 else if (test_bit(B_PinPending, &b->b.flags) ||
 333                                          ((myi = rcu_my_inode(b)) != NULL &&
 334                                           LAFSI(myi)->iblock)) {
 335                                         redirty = 1;
 336                                         lafs_iounlock_block(&b->b);
 337                                 } else {
 338                                         rcu_iput(myi); myi = NULL;
 339                                         lafs_cluster_allocate(&b->b, 0);
 340                                 }
 341                         }
 342                         rcu_iput(myi);
 343                 }
 344                 putdref(b, MKREF(writepage));
 345         }
 346         if (redirty)
 347                 redirty_page_for_writepage(wbc, page);
 348         else {
 349                 if (b0) {
 350                         set_page_writeback(page);
 351                         set_bit(B_HaveWriteback, &b0->b.flags);
 352                         lafs_iocheck_writeback(b0, 0);
 353                 }
 354         }
 355         unlock_page(page);
 356
 357         putdref(b0, MKREF(writepage0));
 358         if (!b0 || redirty)
 359                 return 0;
 360
 361         if (LAFSI(ino)->depth == 0) {
 362                 /* We really want the data to be safe soon, not just
 363                  * the page to be clean.  And the data is in the inode.
 364                  * So write the inode.
 365                  */
 366                 struct datablock *b = lafs_inode_dblock(ino, SYNC,
 367                                                         MKREF(writepageflush));
 368                 if (test_bit(B_Dirty, &b->b.flags)) {
 369                         lafs_iolock_written(&b->b);
 370                         /* block might have been invalidated while we waited */
 371                         if (test_bit(B_Dirty, &b->b.flags))
 372                                 lafs_cluster_allocate(&b->b, 0);
 373                         else
 374                                 lafs_iounlock_block(&b->b);
 375                 }
 376                 putdref(b, MKREF(writepageflush));
 377         }
 378         return 0;
 379 }
 380
 381 static void lafs_sync_page(struct page *page)
 382 {
 383         /* If any block is dirty, flush the cluster so that
 384          * writeback completes.
 385          */
 386         struct inode *ino;
 387         struct address_space *mapping;
 388         struct fs *fs;
 389         int bits;
 390         int i;
 391         int want_flush = 0;
 392
 393         if (!PageWriteback(page))
 394                 /* Presumably page is locked - nothing
 395                  * we can do
 396                  * FIXME should unplug the queue if page
 397                  * is not up to date, and has blocks.
 398                  */
 399                 return;
 400
 401         mapping = page->mapping;
 402         if (!mapping)
 403                 return;
 404         ino = mapping->host;
 405         fs = fs_from_inode(ino);
 406         bits = PAGE_SHIFT - ino->i_blkbits;
 407
 408         spin_lock(&mapping->private_lock);
 409         if (PagePrivate(page)) {
 410                 struct datablock *bl = (struct datablock *)page->private;
 411
 412                 for (i = 0; i < (1<<bits); i++) {
 413                         /* If this block is still dirty though the page is in
 414                          * writeback, the block must be in the current cluster
 415                          */
 416                         if (test_bit(B_Dirty, &bl[i].b.flags)) {
 417                                 want_flush = 2;
 418                                 break;
 419                         }
 420                         if (test_bit(B_Writeback, &bl[i].b.flags))
 421                                 want_flush = 1;
 422                 }
 423         }
 424         spin_unlock(&mapping->private_lock);
 425         if (want_flush == 2)
 426                 set_bit(FlushNeeded, &fs->fsstate);
 427         else if (want_flush == 1)
 428                 set_bit(SecondFlushNeeded, &fs->fsstate);
 429         lafs_wake_thread(fs);
 430 }
 431
 432 static int
 433 lafs_sync_file(struct file *file, int datasync)
 434 {
 435         struct inode *ino = file->f_dentry->d_inode;
 436         int err = lafs_sync_inode(ino, 0);
 437
 438         /* FIXME I ignore datasync, just like file_fsync */
 439         err = write_inode_now(ino, 0) ?: err;
 440
 441         if (err == 0)
 442                 lafs_sync_inode(ino, 1);
 443         return err;
 444 }
 445
 446 void lafs_fillattr(struct inode *ino, struct kstat *stat)
 447 {
 448         /* This makes sure the reported 'atime' is a time that
 449          * we can store and return after a clean restart
 450          */
 451         generic_fillattr(ino, stat);
 452
 453         if (!test_bit(I_AccessTime, &LAFSI(ino)->iflags))
 454                 return;
 455
 456         stat->atime = LAFSI(ino)->md.file.i_accesstime;
 457         lafs_add_atime_offset(&stat->atime, LAFSI(ino)->md.file.atime_offset);
 458 }
 459
 460 int lafs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 461                  struct kstat *stat)
 462 {
 463         lafs_fillattr(dentry->d_inode, stat);
 464         return 0;
 465 }
 466
 467 const struct file_operations lafs_file_file_operations = {
 468         .llseek         = generic_file_llseek,
 469         .read           = do_sync_read,
 470         .write          = do_sync_write,
 471         .aio_read       = generic_file_aio_read,
 472         .aio_write      = generic_file_aio_write,
 473 /*      .ioctl          = lafs__ioctl,*/
 474         .mmap           = generic_file_mmap,
 475         .open           = generic_file_open,
 476 /*      .release        = lafs__release_file,*/
 477         .fsync          = lafs_sync_file,
 478         .splice_read    = generic_file_splice_read,
 479         .splice_write   = generic_file_splice_write,
 480 };
 481
 482 const struct inode_operations lafs_file_ino_operations = {
 483         .setattr        = lafs_setattr,
 484         .getattr        = lafs_getattr,
 485 };
 486
 487 const struct address_space_operations lafs_file_aops = {
 488         .readpage       = lafs_readpage,
 489         .readpages      = lafs_readpages,
 490         .writepage      = lafs_writepage,
 491         .write_begin    = lafs_write_begin,
 492         .write_end      = lafs_write_end,
 493         .invalidatepage = lafs_invalidate_page,
 494         .releasepage    = lafs_release_page,
 495         .sync_page      = lafs_sync_page,
 496 };