]> git.neil.brown.name Git - LaFS.git/blob - file.c
README update
[LaFS.git] / file.c
1
2 /*
3  * fs/lafs/file.c
4  * Copyright (C) 2005-2009
5  * Neil Brown <neilb@suse.de>
6  * Released under the GPL, version 2
7  *
8  * File operations
9  */
10
11 #include        "lafs.h"
12 #include        <linux/bit_spinlock.h>
13 #include        <linux/writeback.h>
14 #include        <linux/version.h>
15
16 struct readpage_data {
17         struct bio *bio;
18         int pages_remaining;
19 };
20
21 static int
22 readpage_filler(void *data, struct page *page)
23 {
24         struct inode *ino = page->mapping->host;
25         struct readpage_data *rpd = data;
26         struct datablock *b0;
27         int n = 1 << (PAGE_SHIFT - ino->i_blkbits);
28         int i;
29         int err = 0;
30
31         rpd->pages_remaining--;
32         b0 = lafs_get_block(ino, 0, page, GFP_KERNEL,
33                             MKREF(readpage0));
34
35         if (!b0)
36                 return -ENOMEM;
37
38         LAFS_BUG(test_bit(B_HaveLock, &b0->b.flags), &b0->b);
39
40         dprintk("read page %p for %d blocks\n", page, n);
41         for (i = 0; i < n; i++) {
42                 struct datablock *b = lafs_get_block(ino, i, page, GFP_KERNEL,
43                                                      MKREF(readpage));
44                 BUG_ON(!b);
45
46                 err = lafs_find_block(b, NOADOPT);
47                 if (err) {
48                         putdref(b, MKREF(readpage));
49                         break;
50                 }
51                 if (test_bit(B_Valid, &b->b.flags)) {
52                         putdref(b, MKREF(readpage));
53                         continue;
54                 }
55         retry:
56                 if (rpd->bio == NULL)
57                         rpd->bio = bio_alloc(GFP_NOFS, rpd->pages_remaining + 1);
58                 err = lafs_load_block(&b->b, rpd->bio);
59                 if (err == -EINVAL) {
60                         submit_bio(READ, rpd->bio);
61                         rpd->bio = NULL;
62                         goto retry;
63                 }
64                 putdref(b, MKREF(readpage));
65                 if (err)
66                         break;
67         }
68         set_bit(B_HaveLock, &b0->b.flags);
69         lafs_iocheck_block(b0, 0);
70         putdref(b0, MKREF(readpage0));
71         return err;
72 }
73
74 static int
75 lafs_readpage(struct file *file, struct page *page)
76 {
77         /* Page is locked.  We want to read it all in,
78          * and then unlock the page.
79          * For this we get all the blocks and load them
80          */
81         int err = 0;
82         struct readpage_data rpd;
83
84         rpd.bio = NULL;
85         rpd.pages_remaining = 1;
86         err = readpage_filler(&rpd, page);
87
88         if (rpd.bio) {
89                 if (rpd.bio->bi_size)
90                         submit_bio(READ, rpd.bio);
91                 else
92                         bio_put(rpd.bio);
93         }
94         return err;
95 }
96
97 static int
98 lafs_readpages(struct file *file, struct address_space *mapping,
99                struct list_head *pages, unsigned nr_pages)
100 {
101         int err = 0;
102         struct readpage_data rpd;
103
104         rpd.bio = NULL;
105         rpd.pages_remaining = nr_pages;
106         err = read_cache_pages(mapping, pages, readpage_filler, &rpd);
107
108         if (rpd.bio) {
109                 if (rpd.bio->bi_size)
110                         submit_bio(READ, rpd.bio);
111                 else
112                         bio_put(rpd.bio);
113         }
114         return err;
115 }
116
117
118 static int
119 lafs_write_begin(struct file *file, struct address_space *mapping,
120                  loff_t pos, unsigned len, unsigned flags,
121                  struct page **pagep, void **fsdata)
122 {
123         /* The range covers one or more blocks in this page
124          * and may partially cover the first or last.
125          * If there is partial coverage, we need to read in
126          * those blocks.
127          * Then we 'reserve' those datablocks to ensure that
128          * a write will succeed.
129          */
130         struct inode *ino = mapping->host;
131         struct fs *fs = fs_from_inode(ino);
132         int bits = ino->i_blkbits;
133         int first, last, loading = -1;
134         struct datablock *fb;
135         int err = 0;
136         int i;
137         pgoff_t index;
138         unsigned from, to;
139         struct page *page;
140         DEFINE_WAIT(wq);
141
142         index = pos >> PAGE_CACHE_SHIFT;
143         from = pos & (PAGE_CACHE_SIZE - 1);
144         to = from + len;
145
146         first = from >> bits;
147         last = (to-1) >> bits;
148
149         while (test_bit(I_Trunc, &LAFSI(ino)->iflags) &&
150                LAFSI(ino)->trunc_next <= last) {
151                 struct datablock *db = lafs_inode_dblock(ino,
152                                                          SYNC, MKREF(writetrunc));
153                 prepare_to_wait(&fs->async_complete, &wq,
154                                 TASK_UNINTERRUPTIBLE);
155                 lafs_inode_handle_orphan(db);
156                 if (test_bit(B_Orphan, &db->b.flags))
157                         schedule();
158                 putdref(db, MKREF(writetrunc));
159         }
160         finish_wait(&fs->async_complete, &wq);
161
162 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29)
163         page = __grab_cache_page(mapping, index);
164 #else
165         page = grab_cache_page_write_begin(mapping, index, flags);
166 #endif
167         if (!page)
168                 return -ENOMEM;
169         *pagep = page;
170
171         fb = lafs_get_block(ino, first, page, GFP_KERNEL, MKREF(write));
172         dprintk("PREPARE %p\n", fb);
173         if (!fb) {
174                 err = -ENOMEM;
175                 goto fail;
176         }
177         /* Further lafs_get_block calls cannot fail as both the page
178          * and the block structures exist
179          */
180         for (i = first + 1 ; i <= last ; i++)
181                 lafs_get_block(ino, i, page, GFP_KERNEL, MKREF(write));
182
183         if (from != (first << bits)) {
184                 err = lafs_find_block(fb, NOADOPT);
185                 if (!err)
186                         err = lafs_load_block(&fb->b, NULL);
187                 if (err)
188                         goto fail;
189                 loading = first;
190         }
191         if (last != loading &&
192             to != ((last+1) << bits)) {
193                 struct datablock *lb = fb + last - first;
194
195                 err = lafs_find_block(lb, ADOPT);
196                 if (!err)
197                         err = lafs_load_block(&lb->b, NULL);
198                 if (err)
199                         goto fail;
200
201                 err = lafs_wait_block(&fb[last-first].b);
202                 if (err)
203                         goto fail;
204         }
205         if (loading == first)
206                 err = lafs_wait_block(&fb->b);
207         if (err)
208                 goto fail;
209 retry:
210         lafs_checkpoint_lock(fs);
211         for (i = first ; err == 0 && i <= last ; i++) {
212                 /* FIXME need PinPending or something to make sure
213                  * credits don't disappear */
214                 err = lafs_reserve_block(&fb[i - first].b, NewSpace);
215                 LAFS_BUG(fb[i-first].b.parent == NULL, &fb[i-first].b);
216         }
217         if (err == -EAGAIN) {
218                 lafs_checkpoint_unlock_wait(fs);
219                 err = 0;
220                 goto retry;
221         }
222         if (err < 0)
223                 goto fail_unlock;
224         *fsdata = fb;
225         return 0;
226
227 fail_unlock:
228         lafs_checkpoint_unlock(fs);
229 fail:
230         if (fb)
231                 for (i = first; i <= last ; i++)
232                         putdref(&fb[i-first], MKREF(write));
233         unlock_page(page);
234         page_cache_release(page);
235         return err;
236 }
237
238 static int
239 lafs_write_end(struct file *file,
240                struct address_space *mapping,
241                loff_t pos, unsigned len, unsigned copied,
242                struct page *page, void *fsdata)
243 {
244         struct inode *ino = mapping->host;
245         struct fs *fs = fs_from_inode(ino);
246         int bits = ino->i_blkbits;
247         unsigned first, last;
248         struct datablock *fb;
249         unsigned i;
250         pgoff_t index;
251         unsigned from, to;
252
253         index = pos >> PAGE_CACHE_SHIFT;
254         from = pos & (PAGE_CACHE_SIZE - 1);
255         to = from + len;
256
257         first = from >> bits;
258         last = (to-1) >> bits;
259
260         fb = fsdata;
261
262         if (unlikely(copied < len)) {
263                 if (!PageUptodate(page))
264                         copied = 0;
265         }
266
267         /*
268          * No need to use i_size_read() here, the i_size
269          * cannot change under us because we hold i_sem.
270          */
271         if (pos + len > ino->i_size) {
272                 i_size_write(ino, pos + len);
273                 /* note that we deliberately don't call
274                  * mark_inode_dirty(ino);
275                  * The inode will automatically be written
276                  * when the data blocks have been,
277                  * and there is no rush before that.
278                  * Just set I_Dirty and allow
279                  * __set_page_dirty_nobuffers to do the rest
280                  */
281                 set_bit(I_Dirty, &LAFSI(ino)->iflags);
282         }
283         __set_page_dirty_nobuffers(page);
284         for (i = first ; i <= last ; i++) {
285                 if (copied > 0) {
286                         set_bit(B_Valid, &(fb+i-first)->b.flags);
287                         lafs_dirty_dblock(fb+i-first);
288                 }
289                 putdref(fb+i-first, MKREF(write));
290         }
291         lafs_checkpoint_unlock(fs);
292         unlock_page(page);
293         page_cache_release(page);
294         return copied;
295 }
296
297 static int
298 lafs_writepage(struct page *page, struct writeback_control *wbc)
299 {
300         struct inode *ino = page->mapping->host;
301         struct datablock *b0 = NULL;
302         int blocks = PAGE_SIZE >> ino->i_blkbits;
303         int i;
304         int redirty = 0;
305         dprintk("WRITEPAGE %lu/%lu/%lu\n", LAFSI(ino)->filesys->i_ino, ino->i_ino, page->index * blocks);
306         for (i = 0 ; i < blocks; i++) {
307                 struct datablock *b = lafs_get_block(ino, i, page, GFP_KERNEL,
308                                                      MKREF(writepage));
309                 if (!b)
310                         continue;
311                 if (i == 0)
312                         b0 = getdref(b, MKREF(writepage0));
313
314                 /* We need to check PinPending, otherwise we might be called
315                  * to flush out a page that is currently part of a transaction.
316                  * Need to be careful with inodes too.
317                  */
318                 if (test_bit(B_Dirty, &b->b.flags)) {
319                         struct inode *myi = NULL;
320                         if (test_bit(B_PinPending, &b->b.flags))
321                                 redirty = 1;
322                         else if ((myi = rcu_my_inode(b)) != NULL &&
323                                  LAFSI(myi)->iblock)
324                                 redirty = 1;
325                         else {
326                                 rcu_iput(myi); myi = NULL;
327                                 lafs_iolock_written(&b->b);
328                                 /* block might have been invalidated,
329                                  * or Pinned, while we waited */
330                                 if (!test_bit(B_Dirty, &b->b.flags))
331                                         lafs_iounlock_block(&b->b);
332                                 else if (test_bit(B_PinPending, &b->b.flags) ||
333                                          ((myi = rcu_my_inode(b)) != NULL &&
334                                           LAFSI(myi)->iblock)) {
335                                         redirty = 1;
336                                         lafs_iounlock_block(&b->b);
337                                 } else {
338                                         rcu_iput(myi); myi = NULL;
339                                         lafs_cluster_allocate(&b->b, 0);
340                                 }
341                         }
342                         rcu_iput(myi);
343                 }
344                 putdref(b, MKREF(writepage));
345         }
346         if (redirty)
347                 redirty_page_for_writepage(wbc, page);
348         else {
349                 if (b0) {
350                         set_page_writeback(page);
351                         set_bit(B_HaveWriteback, &b0->b.flags);
352                         lafs_iocheck_writeback(b0, 0);
353                 }
354         }
355         unlock_page(page);
356
357         putdref(b0, MKREF(writepage0));
358         if (!b0 || redirty)
359                 return 0;
360
361         if (LAFSI(ino)->depth == 0) {
362                 /* We really want the data to be safe soon, not just
363                  * the page to be clean.  And the data is in the inode.
364                  * So write the inode.
365                  */
366                 struct datablock *b = lafs_inode_dblock(ino, SYNC,
367                                                         MKREF(writepageflush));
368                 if (test_bit(B_Dirty, &b->b.flags)) {
369                         lafs_iolock_written(&b->b);
370                         /* block might have been invalidated while we waited */
371                         if (test_bit(B_Dirty, &b->b.flags))
372                                 lafs_cluster_allocate(&b->b, 0);
373                         else
374                                 lafs_iounlock_block(&b->b);
375                 }
376                 putdref(b, MKREF(writepageflush));
377         }
378         return 0;
379 }
380
381 static void lafs_sync_page(struct page *page)
382 {
383         /* If any block is dirty, flush the cluster so that
384          * writeback completes.
385          */
386         struct inode *ino;
387         struct address_space *mapping;
388         struct fs *fs;
389         int bits;
390         int i;
391         int want_flush = 0;
392
393         if (!PageWriteback(page))
394                 /* Presumably page is locked - nothing
395                  * we can do
396                  * FIXME should unplug the queue if page
397                  * is not up to date, and has blocks.
398                  */
399                 return;
400
401         mapping = page->mapping;
402         if (!mapping)
403                 return;
404         ino = mapping->host;
405         fs = fs_from_inode(ino);
406         bits = PAGE_SHIFT - ino->i_blkbits;
407
408         spin_lock(&mapping->private_lock);
409         if (PagePrivate(page)) {
410                 struct datablock *bl = (struct datablock *)page->private;
411
412                 for (i = 0; i < (1<<bits); i++) {
413                         /* If this block is still dirty though the page is in
414                          * writeback, the block must be in the current cluster
415                          */
416                         if (test_bit(B_Dirty, &bl[i].b.flags)) {
417                                 want_flush = 2;
418                                 break;
419                         }
420                         if (test_bit(B_Writeback, &bl[i].b.flags))
421                                 want_flush = 1;
422                 }
423         }
424         spin_unlock(&mapping->private_lock);
425         if (want_flush == 2)
426                 set_bit(FlushNeeded, &fs->fsstate);
427         else if (want_flush == 1)
428                 set_bit(SecondFlushNeeded, &fs->fsstate);
429         lafs_wake_thread(fs);
430 }
431
432 static int
433 lafs_sync_file(struct file *file, int datasync)
434 {
435         struct inode *ino = file->f_dentry->d_inode;
436         int err = lafs_sync_inode(ino, 0);
437
438         /* FIXME I ignore datasync, just like file_fsync */
439         err = write_inode_now(ino, 0) ?: err;
440
441         if (err == 0)
442                 lafs_sync_inode(ino, 1);
443         return err;
444 }
445
446 void lafs_fillattr(struct inode *ino, struct kstat *stat)
447 {
448         /* This makes sure the reported 'atime' is a time that
449          * we can store and return after a clean restart
450          */
451         generic_fillattr(ino, stat);
452
453         if (!test_bit(I_AccessTime, &LAFSI(ino)->iflags))
454                 return;
455
456         stat->atime = LAFSI(ino)->md.file.i_accesstime;
457         lafs_add_atime_offset(&stat->atime, LAFSI(ino)->md.file.atime_offset);
458 }
459
460 int lafs_getattr(struct vfsmount *mnt, struct dentry *dentry,
461                  struct kstat *stat)
462 {
463         lafs_fillattr(dentry->d_inode, stat);
464         return 0;
465 }
466
467 const struct file_operations lafs_file_file_operations = {
468         .llseek         = generic_file_llseek,
469         .read           = do_sync_read,
470         .write          = do_sync_write,
471         .aio_read       = generic_file_aio_read,
472         .aio_write      = generic_file_aio_write,
473 /*      .ioctl          = lafs__ioctl,*/
474         .mmap           = generic_file_mmap,
475         .open           = generic_file_open,
476 /*      .release        = lafs__release_file,*/
477         .fsync          = lafs_sync_file,
478         .splice_read    = generic_file_splice_read,
479         .splice_write   = generic_file_splice_write,
480 };
481
482 const struct inode_operations lafs_file_ino_operations = {
483         .setattr        = lafs_setattr,
484         .getattr        = lafs_getattr,
485 };
486
487 const struct address_space_operations lafs_file_aops = {
488         .readpage       = lafs_readpage,
489         .readpages      = lafs_readpages,
490         .writepage      = lafs_writepage,
491         .write_begin    = lafs_write_begin,
492         .write_end      = lafs_write_end,
493         .invalidatepage = lafs_invalidate_page,
494         .releasepage    = lafs_release_page,
495         .sync_page      = lafs_sync_page,
496 };