]> git.neil.brown.name Git - LaFS.git/blob - inode.c
Replace lots of ino_from_sb uses.
[LaFS.git] / inode.c
1
2 /*
3  * fs/lafs/inode.c
4  * Copyright (C) 2005-2009
5  * Neil Brown <neilb@suse.de>
6  * Released under the GPL, version 2
7  *
8  * generic inode handling
9  *
10  */
11
12 #include        "lafs.h"
13 #include <linux/random.h>
14 #include <linux/delay.h>
15 #include <linux/slab.h>
16
17 static void check_atime_ref(struct inode *ino, int async);
18
19 /* Supporting an async 'iget' - as required by the cleaner -
20  * is slightly non-trivial.
21  * iget*_locked will normally wait for any inode with one
22  * of the flags I_FREEING I_CLEAR I_WILL_FREE I_NEW
23  * to either be unhashed or has the flag cleared.
24  * We cannot afford that wait in the cleaner as we could deadlock.
25  * So we use iget5_locked and provide a test function that fails
26  * if it finds the inode with any of those flags set.
27  * If it does see the inode like that it clear the inum
28  * that is passed in (by reference) so that it knows to continue
29  * failing (for consistency) and so that the 'set' function
30  * we provide can know to fail the 'set'.
31  * The result of this is that if iget finds an inode it would
32  * have to wait on, the inum is cleared and NULL is returned.
33  * An unfortunate side effect is that an inode will be allocated
34  * and then destroyed to no avail.
35  * This is avoided by calling ilookup5 first.  This also allows
36  * us to only allocate/load the data block if there really seems
37  * to be a need.
38  */
39 #define NO_INO (~(ino_t)0)
40 static int async_itest(struct inode *inode, void *data)
41 {
42         ino_t *inump = data;
43         ino_t inum = *inump;
44
45         if (inum == NO_INO)
46                 /* found and is freeing */
47                 return 0;
48         if (inode->i_ino != inum)
49                 return 0;
50         if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) {
51                 *inump = NO_INO;
52                 return 0;
53         }
54         return 1;
55 }
56
57 static int async_iset(struct inode *inode, void *data)
58 {
59         ino_t *inump = data;
60         if (!*inump)
61                 return -EBUSY;
62         inode->i_ino = *inump;
63         return 0;
64 }
65
66 struct inode *
67 lafs_iget(struct super_block *sb, ino_t inum, int async)
68 {
69         /* find, and load if needed, this inum */
70         struct inode *ino = NULL;
71         struct inode *oldino;
72         struct datablock *b = NULL;
73         struct inode *inodefile;
74         struct sb_key *k;
75         int err = 0;
76
77         BUG_ON(inum == NO_INO);
78
79         k = sb->s_fs_info;
80         inodefile = k->root;
81
82         if (async) {
83                 /* We cannot afford to block on 'freeing_inode'
84                  * So use iget5_locked and refuse to match such
85                  * inodes.
86                  * If the inode is 'freeing', inum gets set to NO_INO.
87                  * ilookup5 is used first to avoid an unnecessary
88                  * alloc/free if the inode is locked in some way.
89                  */
90                 while (!ino) {
91                         ino_t inum2 = inum;
92                         err = 0;
93                         ino = ilookup5(sb, inum, async_itest, &inum2);
94                         if (ino)
95                                 break;
96
97                         if (inum2 == NO_INO)
98                                 err = -EAGAIN;
99
100                         /* For async we will always want the dblock loaded,
101                          * and we need to load it first as we cannot afford
102                          * to fail -EAGAIN once we have an I_NEW inode.
103                          */
104                         if (!b)
105                                 b = lafs_get_block(inodefile, inum, NULL,
106                                                    GFP_NOFS, MKREF(iget));
107                         if (!b)
108                                 return ERR_PTR(-ENOMEM);
109
110                         if (!err)
111                                 err = lafs_read_block_async(b);
112
113                         if (!err) {
114                                 /* Have the block, so safe to iget */
115                                 inum2 = inum;
116                                 ino = iget5_locked(sb, inum,
117                                                    async_itest, async_iset,
118                                                    &inum2);
119                                 if (!ino) {
120                                         if (inum2 == NO_INO)
121                                                 err = -EAGAIN;
122                                         else
123                                                 err = -ENOMEM;
124                                 }
125                         }
126                         if (err) {
127                                 if (test_and_set_bit(B_Async, &b->b.flags)) {
128                                         putdref(b, MKREF(iget));
129                                         return ERR_PTR(err);
130                                 }
131                                 getdref(b, MKREF(async));
132                         }
133                 }
134         } else
135                 ino = iget_locked(sb, inum);
136
137         if (!ino) {
138                 putdref(b, MKREF(iget));
139                 return ERR_PTR(-ENOMEM);
140         }
141
142         if (!(ino->i_state & I_NEW)) {
143                 putdref(b, MKREF(iget));
144                 if (ino->i_mode) {
145                         check_atime_ref(ino, async);
146                         return ino;
147                 }
148                 iput(ino);
149                 return ERR_PTR(-ENOENT);
150         }
151
152         LAFSI(ino)->filesys = igrab(inodefile);
153
154         /* surprisingly the inode bdi does not default to the
155          * super_blocks bdi...
156          */
157         ino->i_data.backing_dev_info = sb->s_bdi;
158         /* Need to load block 'inum' from an inode file...
159          */
160         if (!b) {
161                 b = lafs_get_block(inodefile, inum, NULL, GFP_KERNEL, MKREF(iget));
162                 if (!b)
163                         err = -ENOMEM;
164                 else
165                         err = lafs_read_block(b);
166         }
167         if (err)
168                 goto err;
169
170         oldino = rcu_my_inode(b);
171         if (oldino) {
172                 /* The inode is new, but the block thinks it has an
173                  * old inode, so we must be in the process of destroying
174                  * the old one.
175                  * So fail the lookup without even looking at the content
176                  * of the block (Which might not be clear yet).
177                  */
178                 spin_lock(&oldino->i_data.private_lock);
179                 if (!test_bit(I_Deleting, &LAFSI(oldino)->iflags)) {
180                         b->my_inode = NULL;
181                         LAFSI(oldino)->dblock = NULL;
182                         LAFSI(oldino)->iblock = NULL;
183                 }
184                 spin_unlock(&oldino->i_data.private_lock);
185         }
186         rcu_iput(oldino);
187         if (b->my_inode) {
188                 err = -ENOENT;
189                 goto err;
190         }
191
192         err = lafs_import_inode(ino, b);
193         if (err) {
194                 if (err != -ENOENT)
195                         printk("lafs_import_inode failed %d\n", err);
196                 goto err;
197         }
198         check_atime_ref(ino, async);
199         unlock_new_inode(ino);
200 out:
201         if (b && test_and_clear_bit(B_Async, &b->b.flags)) {
202                 putdref(b, MKREF(async));
203                 lafs_wake_thread(fs_from_sb(sb));
204         }
205         putdref(b, MKREF(iget));
206         return ino;
207 err:
208         ino->i_nlink = 0;
209         unlock_new_inode(ino);
210         iput(ino);
211         ino = ERR_PTR(err);
212         goto out;
213 }
214
215 struct inode *
216 lafs_iget_fs(struct fs *fs, int fsnum, int inum, int async)
217 {
218         struct super_block *sb;
219         struct inode *rv;
220
221         sb = fs->prime_sb;
222
223         if (fsnum) {
224                 /* Need to locate or load the superblock for this
225                  * subordinate filesystem
226                  */
227                 struct inode *filesys;
228                 struct super_block *sb2;
229
230                 filesys = lafs_iget(sb, fsnum, async);
231                 if (IS_ERR(filesys))
232                         return filesys;
233                 if (LAFSI(filesys)->type != TypeInodeFile) {
234                         iput(filesys);
235                         return ERR_PTR(-ENOENT);
236                 }
237                 /* FIXME can get_subset_sb be async at all?? */
238                 sb2 = lafs_get_subset_sb(filesys);
239                 if (IS_ERR(sb2)) {
240                         iput(filesys);
241                         return ERR_PTR(PTR_ERR(sb2));
242                 }
243                 rv = lafs_iget(sb2, inum, async);
244                 if (IS_ERR(rv))
245                         deactivate_locked_super(sb2);
246                 else
247                         up_write(&sb2->s_umount);
248         } else {
249                 rv = lafs_iget(sb, inum, async);
250                 if (!IS_ERR(rv))
251                         atomic_inc(&sb->s_active);
252         }
253         return rv;
254 }
255
256 int __must_check
257 lafs_import_inode(struct inode *ino, struct datablock *b)
258 {
259         struct la_inode *lai = map_dblock(b);
260         struct lafs_inode *li = LAFSI(ino);
261         int err = -ENOENT;
262
263         if (lai->filetype == 0) {
264                 li->type = 0;
265                 ino->i_mode = 0;
266                 ino->i_nlink = 0;
267                 goto out;
268         }
269
270         ino->i_mode = S_IFREG;
271         ino->i_nlink = 1; /* For special file, set nlink so they
272                            * never appear unlinked */
273
274         err = -EINVAL;
275
276         LAFS_BUG(ino->i_ino != b->b.fileaddr, &b->b);
277         li->cblocks = le32_to_cpu(lai->data_blocks);
278         li->pblocks = li->ablocks = 0;
279         li->vfs_inode.i_blocks = ((blkcnt_t)li->cblocks
280                                   << (ino->i_sb->s_blocksize_bits - 9));
281         li->ciblocks = le32_to_cpu(lai->index_blocks);
282         li->piblocks = 0;
283         li->iflags = 0;
284
285         ino->i_generation = le16_to_cpu(lai->generation);
286         li->trunc_gen = lai->trunc_gen;
287         li->flags = lai->flags;
288         li->type = lai->filetype;
289         li->metadata_size = le16_to_cpu(lai->metadata_size);
290         li->depth = lai->depth;
291
292         dprintk("inode %lu type is %d\n", (unsigned long)ino->i_ino, li->type);
293
294         ino->i_data.a_ops = &lafs_file_aops;
295         li->trunc_next = 0;
296
297         switch (li->type) {
298         case TypeInodeFile:
299         {
300                 struct fs_md *i = &li->md.fs;
301                 struct fs_metadata *l = &lai->metadata[0].fs;
302                 int nlen;
303
304                 i->usagetable = le16_to_cpu(l->snapshot_usage_table);
305                 decode_time(&ino->i_mtime, le64_to_cpu(l->update_time));
306                 i->cblocks_used = le64_to_cpu(l->blocks_used);
307                 i->pblocks_used = i->ablocks_used = 0;
308                 i->blocks_allowed = le64_to_cpu(l->blocks_allowed);
309                 i->blocks_unalloc = 0;
310                 i->creation_age = le64_to_cpu(l->creation_age);
311                 i->inodes_used = le32_to_cpu(l->inodes_used);
312                 i->quota_inums[0] = le32_to_cpu(l->quota_inodes[0]);
313                 i->quota_inums[1] = le32_to_cpu(l->quota_inodes[1]);
314                 i->quota_inums[2] = le32_to_cpu(l->quota_inodes[2]);
315                 i->quota_inodes[0] = i->quota_inodes[1]
316                         = i->quota_inodes[2] = NULL;
317                 nlen = li->metadata_size - offsetof(struct la_inode,
318                                                     metadata[0].fs.name);
319                 i->accesstime = NULL;
320                 if (i->name)
321                         kfree(i->name);
322                 if (nlen == 0)
323                         i->name = NULL;
324                 else {
325                         /* Need to unmap the dblock to kmalloc because
326                          * the mapping makes us 'atomic'
327                          */
328                         unmap_dblock(b, lai);
329                         i->name = kmalloc(nlen+1, GFP_KERNEL);
330                         lai = map_dblock(b);
331                         l = &lai->metadata[0].fs;
332
333                         err = -ENOMEM;
334                         if (!i->name)
335                                 goto out;
336                         memcpy(i->name, l->name, nlen);
337                         i->name[nlen] = 0;
338                 }
339                 /* Make this look like a directory */
340                 ino->i_mode = S_IFDIR;
341                 ino->i_uid = 0;
342                 ino->i_gid = 0;
343                 ino->i_size = 0;
344                 ino->i_op = &lafs_subset_ino_operations;
345                 ino->i_fop = &lafs_subset_file_operations;
346                 break;
347         }
348
349         case TypeInodeMap:
350         {
351                 struct inodemap_md *m = &li->md.inodemap;
352                 struct inodemap_metadata *s = &lai->metadata[0].inodemap;
353                 m->size = le32_to_cpu(s->size);
354                 m->thisblock = NoBlock;
355                 m->nextbit = 0;
356                 break;
357         }
358
359         case TypeSegmentMap:
360         {
361                 struct su_md *m = &li->md.segmentusage;
362                 struct su_metadata *s = &lai->metadata[0].segmentusage;
363                 m->table_size = le32_to_cpu(s->table_size);
364                 break;
365         }
366
367         case TypeQuota:
368         {
369                 struct quota_md *m = &li->md.quota;
370                 struct quota_metadata *s = &lai->metadata[0].quota;
371                 m->gracetime = le32_to_cpu(s->gracetime);
372                 m->graceunits = le32_to_cpu(s->graceunits);
373                 break;
374         }
375         case TypeOrphanList:
376         {
377                 struct orphan_md *m = &li->md.orphan;
378                 /* This will be set via lafs_count_orphans */
379                 m->nextfree = 0;
380                 m->reserved = 0;
381                 break;
382         }
383         case TypeAccessTime:
384                 break;
385
386         default: /* TypeBase or larger */
387         {
388                 struct file_md *i = &li->md.file;
389                 struct file_metadata *l = &lai->metadata[0].file;
390                 struct dir_metadata *d = &lai->metadata[0].dir;
391                 struct special_metadata *s = &lai->metadata[0].special;
392
393                 if (li->type < TypeBase)
394                         goto out;
395                 i->flags = le16_to_cpu(l->flags);
396                 ino->i_mode = le16_to_cpu(l->mode);
397                 ino->i_uid = le32_to_cpu(l->userid);
398                 ino->i_gid = le32_to_cpu(l->groupid);
399                 i->treeid = le32_to_cpu(l->treeid);
400                 i->creationtime = le64_to_cpu(l->creationtime);
401                 decode_time(&ino->i_mtime, le64_to_cpu(l->modifytime));
402                 decode_time(&ino->i_ctime, le64_to_cpu(l->ctime));
403                 decode_time(&i->i_accesstime, le64_to_cpu(l->accesstime));
404                 ino->i_atime = i->i_accesstime;
405                 i->atime_offset = 0; /* Will be filled-in later probably */
406                 lafs_add_atime_offset(&ino->i_atime, i->atime_offset);
407                 ino->i_size = le64_to_cpu(l->size);
408                 i->parent = le32_to_cpu(l->parent);
409                 ino->i_nlink = le32_to_cpu(l->linkcount);
410                 if (ino->i_nlink == 0 && list_empty(&b->orphans) &&
411                     fs_from_inode(ino)->rolled) {
412                         /* This block should already be on the orphan
413                          * list, otherwise there is a filesystem
414                          * inconsistency.
415                          * Either the orphan file is wrong, or the
416                          * linkcount is wrong.
417                          * It is safest to assume the later - either
418                          * way an FS check would be needed to fix it.
419                          * Note: while roll-forward is happening, this
420                          * situation is perfectly possible and is handled
421                          * correctly.
422                          */
423                         /* FIXME set a superblock flag requesting
424                          * directory linkage checking
425                          */
426                         ino->i_nlink = 1;
427                 }
428
429                 dprintk("  mode = 0%o uid %d size %lld\n",
430                         ino->i_mode, ino->i_uid, ino->i_size);
431                 switch (li->type) {
432                 case TypeFile:
433                         ino->i_op = &lafs_file_ino_operations;
434                         ino->i_fop = &lafs_file_file_operations;
435                         ino->i_mode = (ino->i_mode & 07777)  | S_IFREG;
436                         break;
437                 case TypeDir:
438                         i->seed = le32_to_cpu(d->hash_seed);
439                         ino->i_op = &lafs_dir_ino_operations;
440                         ino->i_fop = &lafs_dir_file_operations;
441                         ino->i_mode = (ino->i_mode & 07777)  | S_IFDIR;
442                         {
443                                 u32 *b = (u32 *)lai;
444                                 dprintk("Hmm. %d %d %d\n",
445                                         (int)b[24],
446                                         (int)b[25],
447                                         (int)b[26]);
448                         }
449                         break;
450                 case TypeSymlink:
451                         ino->i_op = &lafs_link_ino_operations;
452                         ino->i_mode = (ino->i_mode & 07777)  | S_IFLNK;
453                         break;
454                 case TypeSpecial:
455                         /* the data had better be in the inode ... */
456                         ino->i_rdev = MKDEV(le32_to_cpu(s->major),
457                                             le32_to_cpu(s->minor));
458                         ino->i_op = &lafs_special_ino_operations;
459                         init_special_inode(ino, ino->i_mode, ino->i_rdev);
460                         break;
461                 }
462                 break;
463         }
464         }
465
466         ino->i_blkbits = ino->i_sb->s_blocksize_bits;
467         /* FIXME i_blocks and i_byte - used for quota?? */
468         err = 0;
469
470         /* Note: no refcount yet.  Either will remove the reference to the
471          * other when freed
472          */
473         li->dblock = b;
474         rcu_assign_pointer(b->my_inode, ino);
475
476 out:
477         if (err && li->type)
478                 printk("inode %lu type is %d\n",
479                        (unsigned long)ino->i_ino, li->type);
480         unmap_dblock(b, lai);
481         return err;
482 }
483
484 static void check_atime_ref(struct inode *ino, int async)
485 {
486         /* If there is an time file in this filesystem the inode
487          * should hold a reference to the relevant block in
488          * that file.
489          */
490         struct inode *root, *at;
491         u32 bnum;
492         struct datablock *b;
493         if (async)
494                 /* Never bother for async lookups */
495                 return;
496         if (LAFSI(ino)->type < TypeBase)
497                 return;
498         if (test_bit(I_AccessTime, &LAFSI(ino)->iflags))
499                 return;
500         root = LAFSI(ino)->filesys;
501         at = LAFSI(root)->md.fs.accesstime;
502         if (at == NULL)
503                 return;
504
505         if (LAFSI(ino)->md.file.atime_offset)
506                 LAFSI(ino)->md.file.atime_offset = 0;
507
508         /* "* 2" to get byte number, then shift to get block
509          * number
510          */
511         bnum = ino->i_ino >> (at->i_blkbits-1);
512         b = lafs_get_block(at, bnum, NULL, GFP_NOFS, MKREF(atime));
513         if (b) {
514                 if (lafs_read_block(b) == 0) {
515                         u16 *atp;
516                         int i;
517                         atp = map_dblock(b);
518                         i = (ino->i_ino * 2) & ((1<<at->i_blkbits)-1);
519                         LAFSI(ino)->md.file.atime_offset = le16_to_cpu(atp[i]);
520                         set_bit(I_AccessTime, &LAFSI(ino)->iflags);
521                         unmap_dblock(b, atp);
522                         lafs_add_atime_offset(&ino->i_atime,
523                                               LAFSI(ino)->md.file.atime_offset);
524                 } else
525                         putdref(b, MKREF(atime));
526         }
527 }
528
529 void lafs_add_atime_offset(struct timespec *atime, int offset)
530 {
531         int expon;
532         time_t mantissa;
533         if (offset == 0)
534                 return;
535
536         expon = offset & 0x1f;
537         if (expon)
538                 mantissa = (offset >> 5) | 0x800;
539         else
540                 mantissa = (offset >> 5);
541         if (expon >= 11) {
542                 /* seconds */
543                 mantissa <<= expon-11;
544                 atime->tv_sec += mantissa;
545         } else {
546                 /* milliseconds */
547                 if (expon)
548                         mantissa <<= expon-1;
549                 timespec_add_ns(atime, (s64)mantissa * 1000000);
550         }
551 }
552
553 static int normalise(int *mantissa)
554 {
555         /* Shift down until value can be stored in 12 bits:
556          * Top bit will be '1', so only 11 bits needed.
557          * Not used on values below 2048.
558          */
559         int shift = 0;
560         while (*mantissa >= 4096) {
561                 *mantissa >>= 1;
562                 shift ++;
563         }
564         return shift;
565 }
566
567 static int update_atime_delta(struct inode *ino)
568 {
569         /* calculate new delta to show the difference between
570          * i_atime and i_accesstime
571          */
572         int rv;
573         if (LAFSI(ino)->type < TypeBase)
574                 return 0;
575         if (timespec_compare(&ino->i_atime,
576                              &LAFSI(ino)->md.file.i_accesstime) <= 0) {
577                 /* We cannot store negative delta so if i_atime is in the
578                  * past, just store zero
579                  */
580                 rv = 0;
581         } else {
582                 struct timespec diff;
583                 int shift;
584
585                 diff = timespec_sub(ino->i_atime,
586                                     LAFSI(ino)->md.file.i_accesstime);
587                 if (diff.tv_sec >= 2048) {
588                         /* Just store the seconds */
589                         rv = diff.tv_sec;
590                         shift = normalise(&rv) + 11;
591                 } else {
592                         /* Store the milliseconds */
593                         int rv = diff.tv_nsec / 1000000;
594                         rv += diff.tv_sec * 1000;
595                         if (rv >= 2048)
596                                 shift = normalise(&rv) + 1;
597                         else
598                                 shift = 0;
599                 }
600                 if (shift > 31)
601                         rv = 0xFFFF;
602                 else {
603                         rv &= 0x7ff;
604                         rv <<= 5;
605                         rv |= shift;
606                 }
607         }
608         if (LAFSI(ino)->md.file.atime_offset == rv)
609                 return 0;
610
611         LAFSI(ino)->md.file.atime_offset = rv;
612         return 1;
613 }
614
615 static void store_atime_delta(struct inode *ino)
616 {
617         struct inode *at;
618         u32 bnum;
619         struct datablock *b;
620         u16 *atp;
621         int i;
622
623         if (!test_bit(I_AccessTime, &LAFSI(ino)->iflags))
624                 /* sorry, nothing we can do here */
625                 return;
626
627         /* We own a reference, so this lookup must succeed */
628         at = LAFSI(LAFSI(ino)->filesys)->md.fs.accesstime;
629         bnum = ino->i_ino >> (at->i_blkbits-1);
630         b = lafs_get_block(at, bnum, NULL, GFP_NOFS, MKREF(store_atime));
631         BUG_ON(!b);
632         atp = map_dblock(b);
633         i = (ino->i_ino * 2) & ((1<<at->i_blkbits)-1);
634         if (le16_to_cpu(atp[i]) != LAFSI(ino)->md.file.atime_offset) {
635                 atp[i] = cpu_to_le16(LAFSI(ino)->md.file.atime_offset);
636                 lafs_dirty_dblock(b);
637         }
638         unmap_dblock(b, atp);
639         putdref(b, MKREF(store_atime));
640 }
641
642 void lafs_inode_checkpin(struct inode *ino)
643 {
644         /* Make sure I_Pinned is set correctly.
645          * It should be set precisely if i_nlink is non-zero,
646          * and ->iblock is B_Pinned.
647          * When it is set, we own a reference to the inode.
648          *
649          * This needs to be called whenever we change
650          * i_nlink, and whenever we pin or unpin an InoIdx
651          * block.
652          */
653         if (ino->i_nlink == 0) {
654                 /* I_Pinned should not be set */
655                 if (test_and_clear_bit(I_Pinned, &LAFSI(ino)->iflags)) {
656                         if (ino->i_sb->s_type == &lafs_fs_type)
657                                 iput(ino);
658                         else
659                                 lafs_iput_fs(ino);
660                 }
661         } else {
662                 /* Need to check if iblock is Pinned. */
663                 struct indexblock *ib = NULL;
664                 if (LAFSI(ino)->iblock) {
665                         spin_lock(&ino->i_data.private_lock);
666                         ib = LAFSI(ino)->iblock;
667                         if (ib && !test_bit(B_Pinned, &ib->b.flags))
668                                 ib = NULL;
669                         spin_unlock(&ino->i_data.private_lock);
670                 }
671                 if (ib) {
672                         if (!test_and_set_bit(I_Pinned, &LAFSI(ino)->iflags)) {
673                                 if (ino->i_sb->s_type == &lafs_fs_type)
674                                         igrab(ino);
675                                 else
676                                         lafs_igrab_fs(ino);
677                         }
678                 } else {
679                         if (test_and_clear_bit(I_Pinned, &LAFSI(ino)->iflags)) {
680                         if (ino->i_sb->s_type == &lafs_fs_type)
681                                 iput(ino);
682                         else
683                                 lafs_iput_fs(ino);
684                         }
685                 }
686         }
687 }
688
689 struct datablock *lafs_inode_get_dblock(struct inode *ino, REFARG)
690 {
691         struct datablock *db;
692
693         spin_lock(&ino->i_data.private_lock);
694         db = LAFSI(ino)->dblock;
695         if (db) {
696                 if (db->b.inode == ino)
697                         getdref_locked(db, REF);
698                 else {
699                         spin_lock_nested(&db->b.inode->i_data.private_lock, 1);
700                         getdref_locked(db, REF);
701                         spin_unlock(&db->b.inode->i_data.private_lock);
702                 }
703         }
704         spin_unlock(&ino->i_data.private_lock);
705         return db;
706 }
707
708 struct datablock *lafs_inode_dblock(struct inode *ino, int async, REFARG)
709 {
710         struct datablock *db;
711         int err;
712
713         db = lafs_inode_get_dblock(ino, REF);
714
715         if (!db)
716                 db = lafs_get_block(LAFSI(ino)->filesys, ino->i_ino, NULL,
717                                     GFP_KERNEL, REF);
718         if (!db)
719                 return ERR_PTR(-ENOMEM);
720
721         LAFSI(ino)->dblock = db;
722         rcu_assign_pointer(db->my_inode, ino);
723         if (async)
724                 err = lafs_read_block_async(db);
725         else
726                 err = lafs_read_block(db);
727         if (err == 0)
728                 return db;
729
730         putdref(db, REF);
731         return ERR_PTR(err);
732 }
733
734 void lafs_inode_init(struct datablock *b, int type, int mode, struct inode *dir)
735 {
736         /* A new block has been allocated in an inode file to hold an
737          * inode.  We get to fill in initial values so that when
738          * 'iget' calls lafs_import_inode, the correct inode is
739          * loaded.
740          */
741         struct fs *fs = fs_from_inode(b->b.inode);
742         struct la_inode *lai = map_dblock(b);
743         int size;
744
745         lai->data_blocks = cpu_to_le32(0);
746         lai->index_blocks = cpu_to_le32(0);
747         get_random_bytes(&lai->generation, sizeof(lai->generation));
748         lai->depth = 1;
749         lai->trunc_gen = 0;
750         lai->filetype = type;
751         lai->flags = 0;
752
753         switch(type) {
754         case TypeInodeFile:
755         {
756                 struct fs_metadata *l = &lai->metadata[0].fs;
757                 size = sizeof(struct fs_metadata);
758                 l->update_time = 0;
759                 l->blocks_used = 0;
760                 l->blocks_allowed = 0;
761                 l->creation_age = fs->wc[0].cluster_seq;
762                 l->inodes_used = 0;
763                 l->quota_inodes[0] = 0;
764                 l->quota_inodes[1] = 0;
765                 l->quota_inodes[2] = 0;
766                 l->snapshot_usage_table = 0;
767                 l->pad = 0;
768                 /* name will be zero length and not used */
769                 break;
770         }
771         case TypeInodeMap:
772         {
773                 struct inodemap_metadata *l = &lai->metadata[0].inodemap;
774                 l->size = 0;
775                 size = sizeof(struct inodemap_metadata);
776                 break;
777         }
778         case TypeSegmentMap:
779                 size = sizeof(struct su_metadata);
780                 break;
781         case TypeQuota:
782                 size = sizeof(struct quota_metadata);
783                 break;
784         case TypeOrphanList:
785                 size = 0;
786                 break;
787         case TypeAccessTime:
788                 size = 0;
789                 break;
790         default:
791         {
792                 struct file_metadata *l = &lai->metadata[0].file;
793                 struct timespec now = CURRENT_TIME;
794
795                 l->flags = cpu_to_le16(0);
796                 l->userid = cpu_to_le32(current->cred->fsuid);
797                 if (dir && (dir->i_mode & S_ISGID)) {
798                         l->groupid = cpu_to_le32(dir->i_gid);
799                         if (type == TypeDir)
800                                 mode |= S_ISGID;
801                 } else
802                         l->groupid = cpu_to_le32(current->cred->fsgid);
803                 if (dir && LAFSI(dir)->md.file.treeid)
804                         l->treeid = cpu_to_le32(LAFSI(dir)->md.file.treeid);
805                 else
806                         l->treeid = l->userid;
807
808                 l->mode = cpu_to_le16(mode);
809                 l->creationtime = encode_time(&now);
810                 l->modifytime = l->creationtime;
811                 l->ctime = l->creationtime;
812                 l->accesstime = l->creationtime;
813                 l->size = 0;
814                 l->parent = dir ? cpu_to_le32(dir->i_ino) : 0;
815                 l->linkcount = 0;
816                 l->attrinode = 0;
817                 if (type == TypeDir) {
818                         struct dir_metadata *l = &lai->metadata[0].dir;
819                         u32 seed;
820                         get_random_bytes(&seed,
821                                          sizeof(seed));
822                         seed = (seed & ~7) | 1;
823                         l->hash_seed = cpu_to_le32(seed);
824                         size = sizeof(struct dir_metadata);
825                 } else if (type == TypeSpecial) {
826                         struct special_metadata *s = &lai->metadata[0].special;
827                         s->major = s->minor = 0;
828                         size = sizeof(struct special_metadata);
829                 } else
830                         size = sizeof(struct file_metadata);
831         }
832         }
833         size += sizeof(struct la_inode);
834         lai->metadata_size = cpu_to_le32(size);
835         memset(((char *)lai)+size, 0, fs->blocksize-size);
836         *(u16 *)(((char *)lai)+size) = cpu_to_le16(IBLK_EXTENT);
837
838         unmap_dblock(b, lai);
839         set_bit(B_Valid, &b->b.flags);
840         LAFS_BUG(!test_bit(B_Pinned, &b->b.flags), &b->b);
841         lafs_dirty_dblock(b);
842 }
843
844 void lafs_clear_inode(struct inode *ino)
845 {
846         struct lafs_inode *li = LAFSI(ino);
847         dprintk("CLEAR INODE %d\n", (int)ino->i_ino);
848
849         li->type = 0;
850
851         /* Now is a good time to break the linkage between
852          * inode and dblock - but not if the file is
853          * being deleted
854          */
855         if (!test_bit(I_Deleting, &LAFSI(ino)->iflags)) {
856                 struct datablock *db;
857                 spin_lock(&ino->i_data.private_lock);
858                 db = LAFSI(ino)->dblock;
859                 if (db) {
860                         struct indexblock *ib = LAFSI(ino)->iblock;
861                         LAFS_BUG(ib && atomic_read(&ib->b.refcnt), &db->b);
862                         db->my_inode = NULL;
863                         LAFSI(ino)->dblock = NULL;
864                         LAFSI(ino)->iblock = NULL;
865                 }
866                 spin_unlock(&ino->i_data.private_lock);
867         }
868
869         /* FIXME release quota inodes if filesystem */
870 }
871
872 static int inode_map_free(struct fs *fs, struct super_block *sb, u32 inum);
873
874 void lafs_delete_inode(struct inode *ino)
875 {
876         struct fs *fs = fs_from_inode(ino);
877         struct datablock *b;
878
879         if (ino->i_mode == 0) {
880                 /* There never was an inode here,
881                  * so nothing to do.
882                  */
883                 clear_inode(ino);
884                 return;
885         }
886         dprintk("DELETE INODE %d\n", (int)ino->i_ino);
887
888         /* Normal truncation holds an igrab, so we cannot be
889          * deleted until any truncation finishes
890          */
891         BUG_ON(test_bit(I_Trunc, &LAFSI(ino)->iflags));
892
893         b = lafs_inode_dblock(ino, SYNC, MKREF(delete_inode));
894
895         i_size_write(ino, 0);
896         truncate_inode_pages(&ino->i_data, 0);
897         LAFSI(ino)->trunc_next = 0;
898         set_bit(I_Deleting, &LAFSI(ino)->iflags);
899         set_bit(I_Trunc, &LAFSI(ino)->iflags);
900         lafs_igrab_fs(ino);
901
902         if (!IS_ERR(b)) {
903                 set_bit(B_Claimed, &b->b.flags);
904                 lafs_add_orphan(fs, b);
905                 dprintk("PUNCH hole for %d\n", (int)b->b.fileaddr);
906                 putdref(b, MKREF(delete_inode));
907         }
908         inode_map_free(fs, ino->i_sb,  ino->i_ino);
909
910         clear_inode(ino);
911 }
912
913 static int prune(void *data, u32 addr, u64 paddr, int len)
914 {
915         /* This whole index block is being pruned, just account
916          * for everything and it will be cleared afterwards
917          */
918         struct indexblock *ib = data;
919         struct inode *ino = ib->b.inode;
920         struct fs *fs = fs_from_inode(ino);
921         int ph = !!test_bit(B_Phase1, &ib->b.flags);
922         int i;
923         dprintk("PRUNE %d for %d at %lld\n", addr, len, (long long)paddr);
924         if (paddr == 0 || len == 0)
925                 return 0;
926         for (i = 0 ; i < len ; i++)
927                 lafs_summary_update(fs, ino, paddr+i, 0, 0, ph, 0);
928         return len;
929 }
930
931 static int prune_some(void *data, u32 addr, u64 paddr, int len)
932 {
933         /* Part of this index block is being pruned.  Copy
934          * what addresses we can into uninc_table so that
935          * it can be 'incorporated'
936          * We should probably share some code with
937          * lafs_allocated_block??
938          */
939         struct indexblock *ib = data;
940         struct inode *ino = ib->b.inode;
941         struct fs *fs = fs_from_inode(ino);
942         int ph = !!test_bit(B_Phase1, &ib->b.flags);
943         int i;
944
945         if (paddr == 0 || len == 0)
946                 return 0;
947         dprintk("PRUNE2 %d for %d at %lld\n", addr, len, (long long)paddr);
948         for (i = 0 ; i < len ; i++) {
949                 /* FIXME should allow longer truncation ranges in uninc_table
950                  * as they are easy to handle.
951                  */
952                 struct addr *a;
953                 if (addr + i < LAFSI(ino)->trunc_next)
954                         continue;
955                 spin_lock(&ino->i_data.private_lock);
956                 a = &ib->uninc_table.pending_addr
957                         [ib->uninc_table.pending_cnt - 1];
958                 if (ib->uninc_table.pending_cnt <
959                     ARRAY_SIZE(ib->uninc_table.pending_addr)) {
960                         a++;
961                         a->fileaddr = addr + i;
962                         a->physaddr = 0;
963                         a->cnt = 1;
964                         LAFS_BUG(!test_bit(B_Pinned, &ib->b.flags), &ib->b);
965                         ib->uninc_table.pending_cnt++;
966                 } else {
967                         spin_unlock(&ino->i_data.private_lock);
968                         break;
969                 }
970                 spin_unlock(&ino->i_data.private_lock);
971                 lafs_summary_update(fs, ino, paddr+i, 0, 0, ph, 0);
972         }
973         return i;
974 }
975
976 int lafs_inode_handle_orphan(struct datablock *b)
977 {
978         /* Don't need rcu protection for my_inode run_orphan
979          * holds a reference
980          */
981         struct indexblock *ib, *ib2;
982         struct inode *ino = b->my_inode;
983         struct fs *fs = fs_from_inode(ino);
984         u32 trunc_next, next_trunc;
985         int loop_cnt = 20;
986         int err = -ENOMEM;
987
988         if (!test_bit(I_Trunc, &LAFSI(ino)->iflags)) {
989                 if (test_bit(I_Deleting, &LAFSI(ino)->iflags)) {
990                         LAFS_BUG(ino->i_nlink, &b->b);
991                         if (LAFSI(ino)->cblocks +
992                             LAFSI(ino)->pblocks +
993                             LAFSI(ino)->ablocks +
994                             LAFSI(ino)->ciblocks +
995                             LAFSI(ino)->piblocks)
996                         printk("Deleting inode %lu: %ld+%ld+%ld %ld+%ld\n",
997                                ino->i_ino,
998                                LAFSI(ino)->cblocks,
999                                LAFSI(ino)->pblocks,
1000                                LAFSI(ino)->ablocks,
1001                                LAFSI(ino)->ciblocks,
1002                                LAFSI(ino)->piblocks);
1003                         BUG_ON(LAFSI(ino)->cblocks +
1004                                LAFSI(ino)->pblocks +
1005                                LAFSI(ino)->ablocks +
1006                                LAFSI(ino)->ciblocks +
1007                                LAFSI(ino)->piblocks);
1008                         if (lafs_erase_dblock_async(b))
1009                                 lafs_orphan_release(fs, b);
1010                 } else if (ino->i_nlink || LAFSI(ino)->type == 0)
1011                         lafs_orphan_release(fs, b);
1012                 else
1013                         lafs_orphan_forget(fs, b);
1014                 return 0;
1015         }
1016
1017         ib = lafs_make_iblock(ino, ADOPT, SYNC, MKREF(inode_handle_orphan));
1018         if (IS_ERR(ib))
1019                 return PTR_ERR(ib);
1020
1021         /* Here is the guts of 'truncate'.  We find the next leaf index
1022          * block and discard all the addresses there-in.
1023          */
1024         trunc_next = LAFSI(ino)->trunc_next;
1025
1026         if (trunc_next == 0xFFFFFFFF) {
1027                 /* truncate has finished in that all data blocks
1028                  * have been removed and all index block are either
1029                  * gone or pending incorporation at which point they will
1030                  * go.
1031                  * If we hit a phase change, we will need to postpone
1032                  * the rest of the cleaning until it completes.
1033                  * If there is a checkpoint happening, then all the work
1034                  * that we can do now, it will do for us.  So just
1035                  * let it.
1036                  */
1037                 struct indexblock *tmp;
1038                 struct indexblock *next;
1039                 u32 lastaddr;
1040
1041                 if (!test_bit(B_Pinned, &ib->b.flags)) {
1042                         /* must be finished */
1043                         LAFS_BUG(test_bit(B_Dirty, &ib->b.flags), &ib->b);
1044                         clear_bit(I_Trunc, &LAFSI(ino)->iflags);
1045                         lafs_iput_fs(ino);
1046                         wake_up(&fs->trunc_wait);
1047                         err = -ERESTARTSYS;
1048                         goto out2;
1049                 }
1050                 if (fs->checkpointing) {
1051                         /* This cannot happen with current code,
1052                          * but leave it in case we ever have
1053                          * orphan handling parallel with checkpoints
1054                          */
1055                         err = -EBUSY; /* Try again after the checkpoint */
1056                         goto out2;
1057                 }
1058
1059                 lastaddr = (i_size_read(ino) +
1060                             fs->blocksize - 1)
1061                         >> fs->blocksize_bits;
1062                 /* Find a Pinned descendent of ib which has no
1063                  * Pinned descendents and no PrimaryRef dependent
1064                  * (so take the last).
1065                  * Prefer blocks that are beyond EOF (again, take the last).
1066                  * If there are none, descend the last block that
1067                  * is not after EOF and look at its children.
1068                  */
1069                 ib2 = next = ib;
1070                 spin_lock(&ib->b.inode->i_data.private_lock);
1071                 while (next) {
1072                         ib2 = next;
1073                         next = NULL;
1074                         list_for_each_entry(tmp, &ib2->children, b.siblings) {
1075                                 if (!test_bit(B_Index, &tmp->b.flags) ||
1076                                     !test_bit(B_Pinned, &tmp->b.flags))
1077                                         continue;
1078                                 if (next == NULL ||
1079                                     tmp->b.fileaddr > next->b.fileaddr)
1080                                         next = tmp;
1081                         }
1082                 }
1083                 if (ib2->b.fileaddr < lastaddr) {
1084                         /* Must be all done */
1085                         spin_unlock(&ib->b.inode->i_data.private_lock);
1086                         clear_bit(I_Trunc, &LAFSI(ino)->iflags);
1087                         lafs_iput_fs(ino);
1088                         wake_up(&fs->trunc_wait);
1089                         err = -ERESTARTSYS;
1090                         goto out2;
1091                 }
1092                 getiref(ib2, MKREF(inode_handle_orphan2));
1093                 spin_unlock(&ib->b.inode->i_data.private_lock);
1094
1095                 /* ib2 is an index block beyond EOF with no
1096                  * Pinned children.
1097                  * Incorporating it should unpin it.
1098                  */
1099                 if (!list_empty(&ib2->children)) {
1100                         lafs_print_tree(&ib2->b, 3);
1101                         LAFS_BUG(1, &ib2->b);
1102                 }
1103
1104                 if (!lafs_iolock_written_async(&ib2->b)) {
1105                         putiref(ib2, MKREF(inode_handle_orphan2));
1106                         err = -EAGAIN;
1107                         goto out2;
1108                 }
1109                 while (ib2->uninc_table.pending_cnt || ib2->uninc)
1110                         lafs_incorporate(fs, ib2);
1111
1112                 if (test_bit(B_Dirty, &ib2->b.flags) ||
1113                     test_bit(B_Realloc, &ib2->b.flags))
1114                         lafs_cluster_allocate(&ib2->b, 0);
1115                 else
1116                         lafs_iounlock_block(&ib2->b);
1117
1118                 if (!list_empty(&ib2->b.siblings)) {
1119                         printk("looping on %s\n", strblk(&ib2->b));
1120                         loop_cnt--;
1121                         if (loop_cnt < 0)
1122                                 BUG();
1123                 }
1124                 putiref(ib2, MKREF(inode_handle_orphan2));
1125                 err = -ERESTARTSYS;
1126                 if (ib->uninc) {
1127                         if (lafs_iolock_written_async(&ib->b)) {
1128                                 while (ib->uninc)
1129                                         lafs_incorporate(fs, ib);
1130                                 lafs_iounlock_block(&ib->b);
1131                         } else
1132                                 err = -EAGAIN;
1133                 }
1134         out2:
1135                 putiref(ib, MKREF(inode_handle_orphan));
1136                 return err;
1137         }
1138
1139         putiref(ib, MKREF(inode_handle_orphan));
1140
1141         ib = lafs_leaf_find(ino, trunc_next, ADOPT, &next_trunc,
1142                             ASYNC, MKREF(inode_handle_orphan3));
1143         if (IS_ERR(ib))
1144                 return PTR_ERR(ib);
1145         /* now hold an iolock on ib */
1146
1147         /* Ok, trunc_next seems to refer to a block that exists.
1148          * We need to erase it..
1149          *
1150          * So we open up the index block ourselves, call
1151          * lafs_summary_update with each block address, and then
1152          * erase the block.
1153          */
1154
1155         if (LAFSI(ino)->depth == 0) {
1156                 /* Nothing to truncate */
1157                 clear_bit(I_Trunc, &LAFSI(ino)->iflags);
1158                 lafs_iput_fs(ino);
1159                 if (test_bit(B_Pinned, &ib->b.flags))
1160                         /* Need to move the dirtiness which keeps this
1161                          * pinned to the data block.
1162                          */
1163                         lafs_cluster_allocate(&ib->b, 0);
1164                 else
1165                         lafs_iounlock_block(&ib->b);
1166                 err = -ERESTARTSYS;
1167                 goto out_put;
1168         }
1169
1170         lafs_checkpoint_lock(fs);
1171         err = lafs_reserve_block(&ib->b, ReleaseSpace);
1172         if (err < 0)
1173                 goto out;
1174
1175         if (!test_bit(B_Valid, &ib->b.flags) &&
1176             test_bit(B_InoIdx, &ib->b.flags)) {
1177                 /* still invalid, just re-erase to remove
1178                  * pinning */
1179                 LAFSI(ino)->trunc_next = next_trunc;
1180                 lafs_cluster_allocate(&ib->b, 0);
1181                 err = -ERESTARTSYS;
1182                 goto out_unlocked;
1183         }
1184
1185         lafs_pin_block(&ib->b);
1186
1187         /* It might be that this can happen, in which case
1188          * we simply update trunc_next and loop.  But I'd like
1189          * to be sure before I implement that
1190          */
1191         if (!test_bit(B_Valid, &ib->b.flags)) {
1192                 printk("Not Valid: %s\n", strblk(&ib->b));
1193                 printk("depth = %d\n", LAFSI(ino)->depth);
1194                 if (test_bit(B_InoIdx, &ib->b.flags))
1195                         printk("DB: %s\n", strblk(&LAFSI(ib->b.inode)->dblock->b));
1196                 LAFSI(ino)->trunc_next = next_trunc;
1197                 //BUG_ON(!test_bit(B_Valid, &ib->b.flags));
1198                 err = -ERESTARTSYS;
1199                 goto out;
1200         }
1201
1202         if (ib->b.fileaddr < trunc_next &&
1203             lafs_leaf_next(ib, 0) < trunc_next) {
1204                 /* We only want to truncate part of this index block.
1205                  * So we copy addresses into uninc_table and then
1206                  * call lafs_incorporate.
1207                  * This might cause the index tree to grow, so we
1208                  * cannot trust next_trunc
1209                  */
1210                 if (ib->uninc_table.pending_cnt == 0 &&
1211                     ib->uninc == NULL) {
1212                         lafs_dirty_iblock(ib, 0);
1213                         /* FIXME this just removes 8 blocks at a time,
1214                          * which is not enough
1215                          */
1216                         lafs_walk_leaf_index(ib, prune_some, ib);
1217                 }
1218                 if (test_bit(B_Dirty, &ib->b.flags))
1219                         lafs_incorporate(fs, ib);
1220                 err = -ERESTARTSYS;
1221                 goto out;
1222         }
1223         LAFSI(ino)->trunc_next = next_trunc;
1224
1225         while (ib->uninc_table.pending_cnt || ib->uninc) {
1226                 /* There should be no Realloc data blocks here
1227                  * but index blocks might be realloc still.
1228                  */
1229                 LAFS_BUG(!test_bit(B_Dirty, &ib->b.flags) &&
1230                          !test_bit(B_Realloc, &ib->b.flags), &ib->b);
1231                 lafs_incorporate(fs, ib);
1232         }
1233         if (test_bit(B_InoIdx, &ib->b.flags) ||
1234             !test_bit(B_PhysValid, &ib->b.flags) ||
1235             ib->b.physaddr != 0) {
1236                 lafs_walk_leaf_index(ib, prune, ib);
1237                 lafs_clear_index(ib);
1238                 lafs_dirty_iblock(ib, 0);
1239         }
1240         if (test_bit(B_Dirty, &ib->b.flags))
1241                 lafs_incorporate(fs, ib);
1242         if (!list_empty(&ib->children))
1243                 lafs_print_tree(&ib->b, 2);
1244         LAFS_BUG(!list_empty(&ib->children), &ib->b);
1245         err = -ERESTARTSYS;
1246 out:
1247         lafs_iounlock_block(&ib->b);
1248 out_unlocked:
1249         lafs_checkpoint_unlock(fs);
1250 out_put:
1251         putiref(ib, MKREF(inode_handle_orphan3));
1252         return err;
1253 }
1254
1255 void lafs_dirty_inode(struct inode *ino)
1256 {
1257         /* this is called in one of three cases:
1258          * 1/ by lafs internally when dblock or iblock is pinned and
1259          *    ready to be dirtied
1260          * 2/ by writeout before requesting a write - to update mtime
1261          * 3/ by read to update atime
1262          *
1263          * We want to handle atime updates carefully as they may not change
1264          * the stored inode itself.
1265          * For all other updates, the inode dblock exists and is pinned.
1266          * In those cases we will be updating the inode and so can store
1267          * the atime exactly.
1268          * For an atime update, the dblock may not exists, or may not be
1269          * Pinned.  If it isn't then we don't want to make the inode dirty
1270          * but only want to update the delta stored in the atime file.
1271          * The block for that should already be pinned.
1272          *
1273          *
1274          * We mustn't update the data block as it could be in
1275          * writeout and we cannot always wait safely.
1276          * So require that anyone who really cares, dirties the datablock
1277          * or a child themselves.
1278          * When cluster_allocate eventually gets called, it will update
1279          * the datablock from the inode.
1280          * If an update has to wait for the next phase, lock_dblock
1281          * (e.g. in setattr) will do that.
1282          *
1283          * We also use this opportunity to update the filesystem modify time.
1284          */
1285         struct timespec now;
1286         struct inode *filesys;
1287         int atime_only = 1;
1288
1289         if (LAFSI(ino)->dblock) {
1290                 struct datablock *db;
1291                 spin_lock(&ino->i_data.private_lock);
1292                 db = LAFSI(ino)->dblock;
1293                 if (db && test_bit(B_Pinned, &db->b.flags))
1294                         atime_only = 0;
1295                 spin_unlock(&ino->i_data.private_lock);
1296         }
1297
1298         if (atime_only) {
1299                 if (update_atime_delta(ino))
1300                         store_atime_delta(ino);
1301                 return;
1302         }
1303
1304         set_bit(I_Dirty, &LAFSI(ino)->iflags);
1305         ino->i_sb->s_dirt = 1;
1306
1307         if (LAFSI(ino)->type < TypeBase)
1308                 return;
1309         LAFSI(ino)->md.file.i_accesstime = ino->i_atime;
1310         if (LAFSI(ino)->md.file.atime_offset) {
1311                 LAFSI(ino)->md.file.atime_offset = 0;
1312                 store_atime_delta(ino);
1313         }
1314
1315         now = current_fs_time(ino->i_sb);
1316         filesys = LAFSI(ino)->filesys;
1317         if (!timespec_equal(&filesys->i_mtime, &now)) {
1318                 filesys->i_mtime = now;
1319                 set_bit(I_Dirty, &LAFSI(filesys)->iflags);
1320         }
1321 }
1322
1323 int lafs_sync_inode(struct inode *ino, int wait)
1324 {
1325         /* fsync has been called on this file so we need
1326          * to sync any inode updates to the next cluster.
1327          *
1328          * If we cannot create an update record,
1329          * we wait for a phase change, which writes everything
1330          * out.
1331          */
1332         struct datablock *b;
1333         struct fs *fs = fs_from_inode(ino);
1334         struct update_handle uh;
1335         int err;
1336
1337         if (wait) {
1338                 if (LAFSI(ino)->update_cluster > 1)
1339                         lafs_cluster_wait(fs, LAFSI(ino)->update_cluster);
1340                 if (LAFSI(ino)->update_cluster == 1) {
1341                         lafs_checkpoint_lock(fs);
1342                         lafs_checkpoint_unlock_wait(fs);
1343                 }
1344                 return 0;
1345         }
1346
1347         LAFSI(ino)->update_cluster = 0;
1348         if (!test_bit(I_Dirty, &LAFSI(ino)->iflags))
1349                 return 0;
1350         b = lafs_inode_dblock(ino, SYNC, MKREF(write_inode));
1351         if (IS_ERR(b))
1352                 return PTR_ERR(b);
1353
1354         lafs_iolock_written(&b->b);
1355         lafs_inode_fillblock(ino);
1356         lafs_iounlock_block(&b->b);
1357
1358         err = lafs_cluster_update_prepare(&uh, fs, LAFS_INODE_LOG_SIZE);
1359         if (err)
1360                 lafs_cluster_update_abort(&uh);
1361         else {
1362                 lafs_checkpoint_lock(fs);
1363                 if (lafs_cluster_update_pin(&uh) == 0) {
1364                         if (test_and_clear_bit(B_Dirty, &b->b.flags))
1365                                 lafs_space_return(fs, 1);
1366                         LAFSI(ino)->update_cluster =
1367                                 lafs_cluster_update_commit
1368                                 (&uh, b, LAFS_INODE_LOG_START,
1369                                  LAFS_INODE_LOG_SIZE);
1370                 } else  
1371                         lafs_cluster_update_abort(&uh);
1372                 lafs_checkpoint_unlock(fs);
1373         }
1374         if (test_bit(B_Dirty, &b->b.flags)) {
1375                 /* FIXME need to write out the data block...
1376                  * Is that just lafs_cluster_allocate ?
1377                  */
1378         }
1379
1380         if (LAFSI(ino)->update_cluster == 0) {
1381                 lafs_checkpoint_lock(fs);
1382                 if (test_bit(B_Dirty, &b->b.flags))
1383                         LAFSI(ino)->update_cluster = 1;
1384                 lafs_checkpoint_start(fs);
1385                 lafs_checkpoint_unlock(fs);
1386         }
1387         putdref(b, MKREF(write_inode));
1388         return 0; /* FIXME should I return some error message??? */
1389 }
1390
1391 void lafs_inode_fillblock(struct inode *ino)
1392 {
1393         /* copy data from ino into the related data block */
1394
1395         struct lafs_inode *li = LAFSI(ino);
1396         struct datablock *db = li->dblock;
1397         struct la_inode *lai;
1398
1399         clear_bit(I_Dirty, &LAFSI(ino)->iflags);
1400
1401         lai = map_dblock(db);
1402         lai->data_blocks = cpu_to_le32(li->cblocks);
1403         lai->index_blocks = cpu_to_le32(li->ciblocks);
1404         lai->generation = cpu_to_le16(ino->i_generation);
1405         lai->trunc_gen = li->trunc_gen;
1406         lai->flags = li->flags;
1407         lai->filetype = li->type;
1408         if (lai->metadata_size != cpu_to_le16(li->metadata_size)) {
1409                 /* Changing metadata size is wierd.
1410                  * We will need to handle this somehow for xattrs
1411                  * For now we just want to cope with
1412                  * Dir -> InodeFile changes, and that guarantees us
1413                  * there is no index info - so just clear the index 
1414                  * area.
1415                  */
1416                 u16 *s = (u16*)(((char*)lai) + li->metadata_size);
1417                 BUG_ON(li->type != TypeInodeFile);
1418                 lai->metadata_size = cpu_to_le16(li->metadata_size);
1419                 memset(s, 0, ino->i_sb->s_blocksize - li->metadata_size);
1420                 *s = cpu_to_le16(IBLK_INDIRECT);
1421         }
1422         lai->depth = li->depth;
1423
1424         switch (li->type) {
1425         case TypeInodeFile:
1426         {
1427                 struct fs_md *i = &li->md.fs;
1428                 struct fs_metadata *l = &lai->metadata[0].fs;
1429                 int nlen;
1430
1431                 l->snapshot_usage_table = cpu_to_le16(i->usagetable);
1432                 l->update_time = cpu_to_le64(encode_time(&ino->i_mtime));
1433                 l->blocks_used = cpu_to_le64(i->cblocks_used);
1434                 l->blocks_allowed = cpu_to_le64(i->blocks_allowed);
1435                 l->creation_age = cpu_to_le64(i->creation_age);
1436                 l->inodes_used = cpu_to_le32(i->inodes_used);
1437                 l->quota_inodes[0] = cpu_to_le32(i->quota_inums[0]);
1438                 l->quota_inodes[1] = cpu_to_le32(i->quota_inums[1]);
1439                 l->quota_inodes[2] = cpu_to_le32(i->quota_inums[2]);
1440                 nlen = lai->metadata_size - offsetof(struct la_inode,
1441                                                      metadata[0].fs.name);
1442                 memset(l->name, 0, nlen);
1443                 if (i->name == NULL)
1444                         nlen = 0;
1445                 else if (strlen(i->name) < nlen)
1446                         nlen = strlen(i->name);
1447                 memcpy(l->name, i->name, nlen);
1448                 break;
1449         }
1450
1451         case TypeInodeMap:
1452         {
1453                 struct inodemap_md *m = &li->md.inodemap;
1454                 struct inodemap_metadata *s = &lai->metadata[0].inodemap;
1455                 s->size = cpu_to_le32(m->size);
1456                 break;
1457         }
1458
1459         case TypeSegmentMap:
1460         {
1461                 struct su_md *m = &li->md.segmentusage;
1462                 struct su_metadata *s = &lai->metadata[0].segmentusage;
1463                 s->table_size = cpu_to_le32(m->table_size);
1464                 break;
1465         }
1466
1467         case TypeQuota:
1468         {
1469                 struct quota_md *m = &li->md.quota;
1470                 struct quota_metadata *s = &lai->metadata[0].quota;
1471                 s->gracetime = cpu_to_le32(m->gracetime);
1472                 s->graceunits = cpu_to_le32(m->graceunits);
1473                 break;
1474         }
1475         case TypeOrphanList:
1476         case TypeAccessTime:
1477                 break;
1478
1479         default: /* TypeBase or larger */
1480         {
1481                 struct file_md *i = &li->md.file;
1482                 struct file_metadata *l = &lai->metadata[0].file;
1483                 struct dir_metadata *d = &lai->metadata[0].dir;
1484                 struct special_metadata *s = &lai->metadata[0].special;
1485
1486                 if (li->type < TypeBase)
1487                         break;
1488                 l->flags = cpu_to_le16(i->flags);
1489                 l->mode = cpu_to_le16(ino->i_mode);
1490                 l->userid = cpu_to_le32(ino->i_uid);
1491                 l->groupid = cpu_to_le32(ino->i_gid);
1492                 l->treeid = cpu_to_le32(i->treeid);
1493                 l->creationtime = cpu_to_le64(i->creationtime);
1494                 l->modifytime = cpu_to_le64(encode_time(&ino->i_mtime));
1495                 l->ctime = cpu_to_le64(encode_time(&ino->i_ctime));
1496                 l->accesstime = cpu_to_le64(encode_time(&i->i_accesstime));
1497                 l->size = cpu_to_le64(ino->i_size);
1498                 l->parent = cpu_to_le32(i->parent);
1499                 l->linkcount = cpu_to_le32(ino->i_nlink);
1500
1501                 switch (li->type) {
1502                 case TypeFile:
1503                         break;
1504                 case TypeDir:
1505                         d->hash_seed = cpu_to_le32(i->seed);
1506                         break;
1507                 case TypeSymlink:
1508                         break;
1509                 case TypeSpecial:
1510                         s->major = cpu_to_le32(MAJOR(ino->i_rdev));
1511                         s->minor = cpu_to_le32(MINOR(ino->i_rdev));
1512                         break;
1513                 }
1514         }
1515         }
1516         unmap_dblock(db, lai);
1517 }
1518
1519 /*-----------------------------------------------------------------------
1520  * Inode allocate map handling.
1521  * Inode 1 of each fileset is a bitmap of free inode numbers.
1522  * Whenever the file is extended in size, new bits are set to one.  They
1523  * are then cleared when the inode is allocated.  When a block becomes
1524  * full of zeros, we don't need to store it any more.
1525  *
1526  * We don't clear the bit until we are committed to creating an inode
1527  * This means we cannot clear it straight away, so two different threads
1528  * might see the same inode number as being available.  We have two
1529  * approaches to guard against this.
1530  * Firstly we have a 'current' pointer into the inodemap file and
1531  * increase that past the inode we return.  This discourages multiple
1532  * hits but as the pointer would need to be rewound occasionally it
1533  * isn't a guarantee.  The guarantee against multiple allocations is done
1534  * via a flag in the block representing an inode.  This is set
1535  * while an inode is being allocated.
1536  */
1537
1538 /* inode number allocation has the prealloc/pin/commit/abort structure
1539  * so it can be committed effectively
1540  */
1541
1542 static int
1543 choose_free_inum(struct fs *fs, struct super_block *sb, u32 *inump,
1544                  struct datablock **bp, int *restarted)
1545 {
1546         struct inode *im = lafs_iget(sb, 1, SYNC);
1547         loff_t bnum;
1548         struct datablock *b;
1549         char *buf;
1550         int err;
1551         int bit;
1552
1553         if (*bp) {
1554                 struct inode *i = (*bp)->b.inode;
1555                 putdref(*bp, MKREF(cfi_map));
1556                 iput(i);
1557                 *bp = NULL;
1558         }
1559
1560         mutex_lock_nested(&im->i_mutex, I_MUTEX_QUOTA);
1561 retry:
1562         bnum = LAFSI(im)->md.inodemap.thisblock;
1563
1564         if (bnum == NoBlock ||
1565             LAFSI(im)->md.inodemap.nextbit >= (fs->blocksize<<3)) {
1566                 if (bnum == NoBlock)
1567                         bnum = LAFSI(im)->md.inodemap.size;
1568
1569                 if (bnum+1 < LAFSI(im)->md.inodemap.size)
1570                         bnum++;
1571                 else if (!*restarted) {
1572                         bnum = 0;
1573                         *restarted = 1;
1574                 } else {
1575                         /* Need to add a new block to the file */
1576                         bnum = LAFSI(im)->md.inodemap.size;
1577                         b = lafs_get_block(im, bnum, NULL, GFP_KERNEL,
1578                                            MKREF(cfi_map));
1579                         err = -ENOMEM;
1580                         if (!b)
1581                                 goto abort;
1582                         lafs_iolock_written(&b->b);
1583                         set_bit(B_PinPending, &b->b.flags);
1584                         lafs_iounlock_block(&b->b);
1585                 retry2:
1586                         lafs_checkpoint_lock(fs);
1587                         err = lafs_pin_dblock(b, NewSpace);
1588                         if (err == -EAGAIN) {
1589                                 lafs_checkpoint_unlock_wait(fs);
1590                                 goto retry2;
1591                         }
1592                         if (err < 0)
1593                                 goto abort_unlock;
1594
1595                         buf = map_dblock(b);
1596                         /* Set block to "all are free" */
1597                         memset(buf, 0xff, fs->blocksize);
1598                         unmap_dblock(b, buf);
1599                         set_bit(B_Valid, &b->b.flags);
1600                         LAFSI(im)->md.inodemap.size = bnum+1;
1601                         lafs_dirty_inode(im);
1602                         lafs_dirty_dblock(b);
1603                         lafs_checkpoint_unlock(fs);
1604                         putdref(b, MKREF(cfi_map));
1605                 }
1606                 b = NULL;
1607                 err = lafs_find_next(im, &bnum);
1608                 if (err < 0)
1609                         goto abort;
1610                 if (err == 0)
1611                         bnum = 0;
1612
1613                 LAFSI(im)->md.inodemap.nextbit = 0;
1614                 LAFSI(im)->md.inodemap.thisblock = bnum;
1615                 goto retry;
1616         }
1617         b = lafs_get_block(im, bnum, NULL, GFP_KERNEL, MKREF(cfi_map));
1618         err = -ENOSPC;
1619         if (!b)
1620                 goto abort;
1621         err = lafs_find_block(b, NOADOPT);
1622         if (err)
1623                 goto abort;
1624         if (b->b.physaddr == 0 && !test_bit(B_Valid, &b->b.flags)) {
1625                 LAFSI(im)->md.inodemap.nextbit =
1626                         (fs->blocksize<<3) + 1;
1627                 putdref(b,MKREF(cfi_map));
1628                 goto retry;
1629         }
1630         err = lafs_read_block(b);
1631         if (err)
1632                 goto abort;
1633
1634         bit = LAFSI(im)->md.inodemap.nextbit;
1635         LAFSI(im)->md.inodemap.thisblock = bnum;
1636         buf = map_dblock(b);
1637         while (bnum == 0 && bit < 16) {
1638                 /* Never return an inum below 16 - they are special */
1639                 if (!generic_test_le_bit(bit, (unsigned long *)buf))
1640                         generic___clear_le_bit(bit, (unsigned long *)buf);
1641                 bit++;
1642         }
1643
1644         bit = generic_find_next_le_bit((unsigned long *)buf,
1645                                        fs->blocksize<<3, bit);
1646         unmap_dblock(b, buf);
1647         LAFSI(im)->md.inodemap.nextbit = bit+1;
1648         if (bit >= fs->blocksize<<3) {
1649                 putdref(b,MKREF(cfi_map));
1650                 goto retry;
1651         }
1652         mutex_unlock(&im->i_mutex);
1653         *bp = b;
1654         *inump = bit + (bnum << (im->i_blkbits + 3));
1655         return 0;
1656
1657 abort_unlock:
1658         lafs_checkpoint_unlock(fs);
1659 abort:
1660         putdref(b, MKREF(cfi_map));
1661         *bp = NULL;
1662         mutex_unlock(&im->i_mutex);
1663         iput(im);
1664         return err;
1665 }
1666
1667 struct inode_map_new_info {
1668         struct datablock *ib, *mb;
1669 };
1670
1671 static int
1672 inode_map_new_prepare(struct fs *fs, int inum, struct super_block *sb,
1673                       struct inode_map_new_info *imni)
1674 {
1675         int choice = inum;
1676         int restarted = 0;
1677         int err = 0;
1678         struct datablock *b;
1679
1680         imni->ib = imni->mb = NULL;
1681 retry:
1682         if (inum == 0)
1683                 /* choose a possibly-free inode number */
1684                 err = choose_free_inum(fs, sb, &choice,
1685                                        &imni->mb, &restarted);
1686         if (err)
1687                 return err;
1688
1689         b = lafs_get_block(ino_from_sb(sb), choice, NULL, GFP_KERNEL,
1690                            MKREF(cfi_ino));
1691         if (!b)
1692                 return -ENOMEM;
1693
1694         if (test_and_set_bit(B_Claimed, &b->b.flags)) {
1695                 putdref(b, MKREF(cfi_ino));
1696                 if (inum)
1697                         return -EEXIST;
1698                 goto retry;
1699         }
1700         if (imni->mb) {
1701                 lafs_iolock_written(&imni->mb->b);
1702                 set_bit(B_PinPending, &imni->mb->b.flags);
1703                 lafs_iounlock_block(&imni->mb->b);
1704         }
1705         set_bit(B_PinPending, &b->b.flags);
1706         b->my_inode = NULL;
1707         imni->ib = b;
1708         return 0;
1709 }
1710
1711 static int
1712 inode_map_new_pin(struct inode_map_new_info *imni)
1713 {
1714         int err = 0;
1715         if (imni->mb)
1716                 err = lafs_pin_dblock(imni->mb, NewSpace);
1717         err = err ?: lafs_pin_dblock(imni->ib, NewSpace);
1718         return err;
1719 }
1720
1721 static void
1722 inode_map_new_commit(struct inode_map_new_info *imni)
1723 {
1724         unsigned long *buf;
1725
1726         if (imni->mb) {
1727                 int blksize = imni->ib->b.inode->i_sb->s_blocksize;
1728                 int bit = imni->ib->b.fileaddr & (blksize*8 - 1);
1729                 int hole = 0;
1730                 struct inode *ino = imni->mb->b.inode;
1731
1732                 mutex_lock_nested(&ino->i_mutex, I_MUTEX_QUOTA);
1733                 buf = map_dblock(imni->mb);
1734                 generic___clear_le_bit(bit, buf);
1735                 if (buf[blksize/sizeof(*buf)-1] == 0 &&
1736                     generic_find_next_le_bit(buf, blksize*8, 0) == blksize*8)
1737                         /* block is empty, punch a hole */
1738                         hole = 1;
1739
1740                 unmap_dblock(imni->mb, buf);
1741                 if (hole)
1742                         lafs_erase_dblock(imni->mb);
1743                 else
1744                         lafs_dirty_dblock(imni->mb);
1745
1746                 putdref(imni->mb, MKREF(cfi_map));
1747                 mutex_unlock(&ino->i_mutex);
1748                 iput(ino);
1749         }
1750         putdref(imni->ib, MKREF(cfi_ino));
1751 }
1752
1753 static void
1754 inode_map_new_abort(struct inode_map_new_info *imni)
1755 {
1756         if (imni->ib) {
1757                 clear_bit(B_Claimed, &imni->ib->b.flags);
1758                 clear_bit(B_PinPending, &imni->ib->b.flags);
1759                 lafs_orphan_release(fs_from_inode(imni->ib->b.inode),
1760                                     imni->ib);
1761         }
1762         putdref(imni->ib, MKREF(cfi_ino));
1763         if (imni->mb) {
1764                 struct inode *ino = imni->mb->b.inode;
1765                 putdref(imni->mb, MKREF(cfi_map));
1766                 iput(ino);
1767         }
1768 }
1769
1770 struct inode *
1771 lafs_new_inode(struct fs *fs, struct super_block *sb, struct inode *dir,
1772                int type, int inum, int mode, struct datablock **inodbp)
1773 {
1774         /* allocate and instantiate a new inode.  If inum is non-zero,
1775          * choose any number, otherwise we are creating a special inode
1776          * and have to use the given number.
1777          * This creation is committed independently of any name that might
1778          * subsequently be given to the inode.  So we register it as an
1779          * orphan so that it will be cleaned up if the name isn't
1780          * successfully created
1781          *
1782          */
1783         struct inode *ino;
1784         struct datablock *b;
1785         struct inode_map_new_info imni;
1786         struct update_handle ui;
1787         int err;
1788
1789         err = inode_map_new_prepare(fs, inum, sb, &imni);
1790         err = lafs_cluster_update_prepare(&ui, fs, sizeof(struct la_inode))
1791                 ?: err;
1792         if (err == 0)
1793                 err = lafs_make_orphan(fs, imni.ib);
1794         if (err)
1795                 goto abort;
1796 retry:
1797         lafs_checkpoint_lock(fs);
1798
1799         err = inode_map_new_pin(&imni);
1800
1801         if (err == -EAGAIN) {
1802                 lafs_checkpoint_unlock_wait(fs);
1803                 goto retry;
1804         }
1805         if (err < 0)
1806                 goto abort_unlock;
1807
1808         b = getdref(imni.ib, MKREF(inode_new));
1809
1810         lafs_iolock_block(&b->b); /* make sure we don't race with the cleaner
1811                                    * and zero this inode while trying to load it
1812                                    */
1813         lafs_inode_init(b, type, mode, dir);
1814         lafs_iounlock_block(&b->b);
1815
1816         inode_map_new_commit(&imni);
1817         ino = lafs_iget(sb, b->b.fileaddr, SYNC);
1818         if (IS_ERR(ino)) {
1819                 lafs_cluster_update_abort(&ui);
1820                 LAFS_BUG(1, &b->b);
1821         } else
1822                 lafs_cluster_update_commit(&ui, b, 0,
1823                                            LAFSI(ino)->metadata_size);
1824         LAFS_BUG(LAFSI(ino)->dblock != b, &b->b);
1825         LAFS_BUG(b->my_inode != ino, &b->b);
1826         lafs_checkpoint_unlock(fs);
1827
1828         if (inodbp)
1829                 *inodbp = b;
1830         else
1831                 putdref(b, MKREF(inode_new));
1832         return ino;
1833
1834 abort_unlock:
1835         lafs_checkpoint_unlock(fs);
1836         err = -ENOSPC;
1837 abort:
1838         inode_map_new_abort(&imni);
1839         lafs_cluster_update_abort(&ui);
1840         dprintk("After abort %d: %s\n", err, strblk(&imni.ib->b));
1841         return ERR_PTR(err);
1842 }
1843
1844 static int inode_map_free(struct fs *fs, struct super_block *sb, u32 inum)
1845 {
1846         struct inode *im = lafs_iget(sb, 1, SYNC);
1847         int bit;
1848         unsigned long *buf;
1849         struct datablock *b;
1850         u32 bnum;
1851         int err;
1852
1853         mutex_lock_nested(&im->i_mutex, I_MUTEX_QUOTA);
1854
1855         bnum = inum >> (3 + sb->s_blocksize_bits);
1856         bit = inum - (bnum << (3 + sb->s_blocksize_bits));
1857         b = lafs_get_block(im, bnum, NULL, GFP_KERNEL, MKREF(inode_map_free));
1858         if (!b) {
1859                 mutex_unlock(&im->i_mutex);
1860                 iput(im);
1861                 return -ENOMEM;
1862         }
1863         err = lafs_read_block(b);
1864         if (err) {
1865                 putdref(b, MKREF(inode_map_free));
1866                 mutex_unlock(&im->i_mutex);
1867                 iput(im);
1868                 return err;
1869         }
1870         lafs_iolock_written(&b->b);
1871         set_bit(B_PinPending, &b->b.flags);
1872         lafs_iounlock_block(&b->b);
1873 retry:
1874         lafs_checkpoint_lock(fs);
1875         err = lafs_pin_dblock(b, ReleaseSpace);
1876         if (err == -EAGAIN) {
1877                 lafs_checkpoint_unlock_wait(fs);
1878                 goto retry;
1879         }
1880         BUG_ON(err < 0);
1881         buf = map_dblock(b);
1882         generic___set_le_bit(bit, buf);
1883         unmap_dblock(b, buf);
1884         lafs_dirty_dblock(b);
1885         putdref(b, MKREF(inode_map_free));
1886         lafs_checkpoint_unlock(fs);
1887         mutex_unlock(&im->i_mutex);
1888         iput(im);
1889         return 0;
1890 }
1891
1892 int lafs_inode_inuse(struct fs *fs, struct super_block *sb, u32 inum)
1893 {
1894         /* This is used during roll-forward to register a newly created
1895          * inode in the inode map
1896          */
1897         struct inode *im = lafs_iget(sb, 1, SYNC);
1898         int bit;
1899         unsigned long *buf;
1900         struct datablock *b;
1901         u32 bnum;
1902         int err;
1903
1904         mutex_lock_nested(&im->i_mutex, I_MUTEX_QUOTA);
1905
1906         bnum = inum >> (3 + sb->s_blocksize_bits);
1907         bit = inum - (bnum << (3 + sb->s_blocksize_bits));
1908         if (bnum > LAFSI(im)->md.inodemap.size) {
1909                 /* inum to unbelievably big */
1910                 mutex_unlock(&im->i_mutex);
1911                 iput(im);
1912                 return -EINVAL;
1913         }
1914         b = lafs_get_block(im, bnum, NULL, GFP_KERNEL, MKREF(inode_map_free));
1915         if (!b) {
1916                 mutex_unlock(&im->i_mutex);
1917                 iput(im);
1918                 return -ENOMEM;
1919         }
1920
1921         err = lafs_read_block(b);
1922         if (err) {
1923                 putdref(b, MKREF(inode_map_free));
1924                 mutex_unlock(&im->i_mutex);
1925                 iput(im);
1926                 return err;
1927         }
1928
1929         lafs_iolock_written(&b->b);
1930         set_bit(B_PinPending, &b->b.flags);
1931         lafs_iounlock_block(&b->b);
1932 retry:
1933         lafs_checkpoint_lock(fs);
1934         err = lafs_pin_dblock(b, CleanSpace);
1935         if (err == -EAGAIN) {
1936                 lafs_checkpoint_unlock_wait(fs);
1937                 goto retry;
1938         }
1939         BUG_ON(err < 0);
1940         buf = map_dblock(b);
1941         if (bnum == LAFSI(im)->md.inodemap.size) {
1942                 /* need to add a new block to the file */
1943                 memset(buf, 0xff, fs->blocksize);
1944                 LAFSI(im)->md.inodemap.size = bnum + 1;
1945                 lafs_dirty_inode(im);
1946         }
1947         generic___clear_le_bit(bit, buf);
1948         unmap_dblock(b, buf);
1949         lafs_dirty_dblock(b);
1950         putdref(b, MKREF(inode_map_free));
1951         lafs_checkpoint_unlock(fs);
1952         mutex_unlock(&im->i_mutex);
1953         iput(im);
1954         return 0;
1955 }
1956
1957
1958
1959 int lafs_setattr(struct dentry *dentry, struct iattr *attr)
1960 {
1961         int err;
1962         struct inode *ino = dentry->d_inode;
1963         struct fs *fs = fs_from_inode(ino);
1964         struct datablock *db;
1965
1966         err = inode_change_ok(ino, attr);
1967         db = lafs_inode_dblock(ino, SYNC, MKREF(setattr));
1968         if (IS_ERR(db))
1969                 err = PTR_ERR(db);
1970         if (err)
1971                 return err;
1972
1973         /* We don't need iolock_written here as we don't
1974          * actually change the inode block yet
1975          */
1976         lafs_iolock_block(&db->b);
1977         set_bit(B_PinPending, &db->b.flags);
1978         lafs_iounlock_block(&db->b);
1979
1980         /* FIXME quota stuff */
1981
1982 again:
1983         lafs_checkpoint_lock(fs);
1984         err = lafs_pin_dblock(db, ReleaseSpace);
1985         if (err == -EAGAIN) {
1986                 lafs_checkpoint_unlock_wait(fs);
1987                 goto again;
1988         }
1989         /* inode_setattr calls lafs_dirty_inode, which sets
1990          * I_Dirty so the dblock will get updated.
1991          */
1992         err = err ?: inode_setattr(ino, attr);
1993         if (!err)
1994                 lafs_dirty_dblock(db);
1995         clear_bit(B_PinPending, &db->b.flags);
1996         putdref(db, MKREF(setattr));
1997         lafs_checkpoint_unlock(fs);
1998
1999         return err;
2000 }
2001
2002 void lafs_truncate(struct inode *ino)
2003 {
2004         /* Want to truncate this file.
2005          * i_size has already been changed, and the address space
2006          * has been cleaned up.
2007          * So just start the background truncate
2008          */
2009         struct fs *fs = fs_from_inode(ino);
2010         struct datablock *db = lafs_inode_dblock(ino, SYNC, MKREF(trunc));
2011         loff_t trunc_block;
2012         DEFINE_WAIT(wq);
2013
2014         if (IS_ERR(db))
2015                 return;
2016
2017         trunc_block = ((i_size_read(ino) + fs->blocksize - 1)
2018                        >> fs->blocksize_bits);
2019         /* We hold i_mutex, so regular orphan processing cannot
2020          * contine - we have to push it forward ourselves.
2021          */
2022         while (test_bit(I_Trunc, &LAFSI(ino)->iflags) &&
2023                LAFSI(ino)->trunc_next < trunc_block) {
2024                 prepare_to_wait(&fs->async_complete, &wq,
2025                                 TASK_UNINTERRUPTIBLE);
2026                 lafs_inode_handle_orphan(db);
2027                 if (test_bit(B_Orphan, &db->b.flags))
2028                         schedule();
2029         }
2030         finish_wait(&fs->async_complete, &wq);
2031
2032         /* There is nothing we can do about errors here.  The
2033          * most likely are ENOMEM which itself is very unlikely.
2034          * If this doesn't get registered as an orphan .... maybe
2035          * it will have to wait until something else truncates it.
2036          */
2037         lafs_make_orphan(fs, db);
2038
2039         if (!test_and_set_bit(I_Trunc, &LAFSI(ino)->iflags))
2040                 lafs_igrab_fs(ino);
2041         if (trunc_block == 0)
2042                 LAFSI(ino)->trunc_gen++;
2043         LAFSI(ino)->trunc_next = trunc_block;
2044         putdref(db, MKREF(trunc));
2045 }
2046
2047 const struct inode_operations lafs_special_ino_operations = {
2048         .setattr        = lafs_setattr,
2049         .getattr        = lafs_getattr,
2050         .truncate       = lafs_truncate,
2051 };