4 * Copyright (C) 2005-2009
5 * Neil Brown <neilb@suse.de>
6 * Released under the GPL, version 2
8 * generic inode handling
13 #include <linux/random.h>
14 #include <linux/delay.h>
15 #include <linux/slab.h>
17 static void check_atime_ref(struct inode *ino, int async);
19 /* Supporting an async 'iget' - as required by the cleaner -
20 * is slightly non-trivial.
21 * iget*_locked will normally wait for any inode with one
22 * of the flags I_FREEING I_CLEAR I_WILL_FREE I_NEW
23 * to either be unhashed or have the flag cleared.
24 * We cannot afford that wait in the cleaner as we could deadlock.
25 * So we use iget5_locked and provide a test function that fails
26 * if it finds the inode with any of those flags set.
27 * If it does see the inode like that it sets a flag in the 'ikey'
28 * that is passed in by reference so that it knows to continue
29 * failing (for consistency) and so that the 'set' function
30 * we provide can know to fail the 'set'.
31 * The result of this is that if iget finds an inode it would
32 * have to wait on, a flag is set and NULL is returned.
33 * An unfortunate side effect is that an inode will be allocated
34 * and then destroyed to no avail.
35 * This is avoided by calling ilookup5 first. This also allows
36 * us to only allocate/load the data block if there really seems
45 static int sync_itest(struct inode *inode, void *data)
47 struct ikey *ik = data;
49 if (inode->i_ino != ik->inum ||
50 LAFSI(inode)->filesys != ik->fsys)
55 static int async_itest(struct inode *inode, void *data)
57 struct ikey *ik = data;
60 /* found and is freeing */
62 if (!sync_itest(inode, data))
64 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) {
71 static int iset(struct inode *inode, void *data)
73 struct ikey *ik = data;
76 inode->i_ino = ik->inum;
77 LAFSI(inode)->filesys = ik->fsys;
82 lafs_iget(struct inode *fsys, ino_t inum, int async)
84 /* find, and load if needed, this inum */
85 struct inode *ino = NULL;
87 struct datablock *b = NULL;
88 struct ikey ik = { .inum = inum, .fsys = fsys, };
90 struct super_block *sb = fsys->i_sb;
93 /* We cannot afford to block on 'freeing_inode'
94 * So use iget5_locked and refuse to match such
96 * If the inode is 'freeing', inum gets set to NO_INO.
97 * ilookup5 is used first to avoid an unnecessary
98 * alloc/free if the inode is locked in some way.
103 ino = ilookup5(sb, inum, async_itest, &ik);
110 /* For async we will always want the dblock loaded,
111 * and we need to load it first as we cannot afford
112 * to fail -EAGAIN once we have an I_NEW inode.
115 b = lafs_get_block(fsys, inum, NULL,
116 GFP_NOFS, MKREF(iget));
118 return ERR_PTR(-ENOMEM);
121 err = lafs_read_block_async(b);
124 /* Have the block, so safe to iget */
125 ino = iget5_locked(sb, inum,
136 if (test_and_set_bit(B_Async, &b->b.flags)) {
137 putdref(b, MKREF(iget));
140 getdref(b, MKREF(async));
144 ino = iget5_locked(sb, inum, sync_itest, iset, &ik);
147 putdref(b, MKREF(iget));
148 return ERR_PTR(-ENOMEM);
151 if (!(ino->i_state & I_NEW)) {
152 putdref(b, MKREF(iget));
154 check_atime_ref(ino, async);
158 return ERR_PTR(-ENOENT);
161 igrab(LAFSI(ino)->filesys);
163 /* surprisingly the inode bdi does not default to the
164 * super_blocks bdi...
166 ino->i_data.backing_dev_info = sb->s_bdi;
167 /* Need to load block 'inum' from an inode file...
170 b = lafs_get_block(fsys, inum, NULL, GFP_KERNEL, MKREF(iget));
174 err = lafs_read_block(b);
179 oldino = rcu_my_inode(b);
181 /* The inode is new, but the block thinks it has an
182 * old inode, so we must be in the process of destroying
184 * So fail the lookup without even looking at the content
185 * of the block (Which might not be clear yet).
187 spin_lock(&oldino->i_data.private_lock);
188 if (!test_bit(I_Deleting, &LAFSI(oldino)->iflags)) {
190 LAFSI(oldino)->dblock = NULL;
191 LAFSI(oldino)->iblock = NULL;
193 spin_unlock(&oldino->i_data.private_lock);
201 err = lafs_import_inode(ino, b);
204 printk("lafs_import_inode failed %d\n", err);
207 check_atime_ref(ino, async);
208 unlock_new_inode(ino);
210 if (b && test_and_clear_bit(B_Async, &b->b.flags)) {
211 putdref(b, MKREF(async));
212 lafs_wake_thread(fs_from_inode(fsys));
214 putdref(b, MKREF(iget));
218 unlock_new_inode(ino);
225 lafs_iget_fs(struct fs *fs, int fsnum, int inum, int async)
227 struct super_block *sb;
233 /* Need to locate or load the superblock for this
234 * subordinate filesystem
236 struct inode *filesys;
238 filesys = lafs_iget(fs->ss[0].root, fsnum, async);
241 if (LAFSI(filesys)->type != TypeInodeFile) {
243 return ERR_PTR(-ENOENT);
245 rv = lafs_iget(filesys, inum, async);
247 rv = lafs_iget(fs->ss[0].root, inum, async);
249 atomic_inc(&sb->s_active);
251 rv = igrab(fs->ss[0].root);
252 atomic_inc(&sb->s_active);
258 lafs_import_inode(struct inode *ino, struct datablock *b)
260 struct la_inode *lai = map_dblock(b);
261 struct lafs_inode *li = LAFSI(ino);
264 if (lai->filetype == 0) {
271 ino->i_mode = S_IFREG;
272 ino->i_nlink = 1; /* For special file, set nlink so they
273 * never appear unlinked */
277 LAFS_BUG(ino->i_ino != b->b.fileaddr, &b->b);
278 li->cblocks = le32_to_cpu(lai->data_blocks);
279 li->pblocks = li->ablocks = 0;
280 li->vfs_inode.i_blocks = ((blkcnt_t)li->cblocks
281 << (ino->i_sb->s_blocksize_bits - 9));
282 li->ciblocks = le32_to_cpu(lai->index_blocks);
286 ino->i_generation = le16_to_cpu(lai->generation);
287 li->trunc_gen = lai->trunc_gen;
288 li->flags = lai->flags;
289 li->type = lai->filetype;
290 li->metadata_size = le16_to_cpu(lai->metadata_size);
291 li->depth = lai->depth;
293 dprintk("inode %lu type is %d\n", (unsigned long)ino->i_ino, li->type);
295 ino->i_data.a_ops = &lafs_file_aops;
301 struct fs_md *i = &li->md.fs;
302 struct fs_metadata *l = &lai->metadata[0].fs;
305 i->usagetable = le16_to_cpu(l->snapshot_usage_table);
306 decode_time(&ino->i_mtime, le64_to_cpu(l->update_time));
307 i->cblocks_used = le64_to_cpu(l->blocks_used);
308 i->pblocks_used = i->ablocks_used = 0;
309 i->blocks_allowed = le64_to_cpu(l->blocks_allowed);
310 i->blocks_unalloc = 0;
311 i->creation_age = le64_to_cpu(l->creation_age);
312 i->inodes_used = le32_to_cpu(l->inodes_used);
313 i->parent = le32_to_cpu(l->parent);
314 i->quota_inums[0] = le32_to_cpu(l->quota_inodes[0]);
315 i->quota_inums[1] = le32_to_cpu(l->quota_inodes[1]);
316 i->quota_inums[2] = le32_to_cpu(l->quota_inodes[2]);
317 i->quota_inodes[0] = i->quota_inodes[1]
318 = i->quota_inodes[2] = NULL;
319 nlen = li->metadata_size - offsetof(struct la_inode,
320 metadata[0].fs.name);
321 i->accesstime = NULL;
327 /* Need to unmap the dblock to kmalloc because
328 * the mapping makes us 'atomic'
330 unmap_dblock(b, lai);
331 i->name = kmalloc(nlen+1, GFP_KERNEL);
333 l = &lai->metadata[0].fs;
338 memcpy(i->name, l->name, nlen);
341 /* Make this look like a directory */
342 ino->i_mode = S_IFDIR;
346 ino->i_op = &lafs_subset_ino_operations;
347 ino->i_fop = &lafs_subset_file_operations;
353 struct inodemap_md *m = &li->md.inodemap;
354 struct inodemap_metadata *s = &lai->metadata[0].inodemap;
355 m->size = le32_to_cpu(s->size);
356 m->thisblock = NoBlock;
363 struct su_md *m = &li->md.segmentusage;
364 struct su_metadata *s = &lai->metadata[0].segmentusage;
365 m->table_size = le32_to_cpu(s->table_size);
371 struct quota_md *m = &li->md.quota;
372 struct quota_metadata *s = &lai->metadata[0].quota;
373 m->gracetime = le32_to_cpu(s->gracetime);
374 m->graceunits = le32_to_cpu(s->graceunits);
379 struct orphan_md *m = &li->md.orphan;
380 /* This will be set via lafs_count_orphans */
388 default: /* TypeBase or larger */
390 struct file_md *i = &li->md.file;
391 struct file_metadata *l = &lai->metadata[0].file;
392 struct dir_metadata *d = &lai->metadata[0].dir;
393 struct special_metadata *s = &lai->metadata[0].special;
395 if (li->type < TypeBase)
397 i->flags = le16_to_cpu(l->flags);
398 ino->i_mode = le16_to_cpu(l->mode);
399 ino->i_uid = le32_to_cpu(l->userid);
400 ino->i_gid = le32_to_cpu(l->groupid);
401 i->treeid = le32_to_cpu(l->treeid);
402 i->creationtime = le64_to_cpu(l->creationtime);
403 decode_time(&ino->i_mtime, le64_to_cpu(l->modifytime));
404 decode_time(&ino->i_ctime, le64_to_cpu(l->ctime));
405 decode_time(&i->i_accesstime, le64_to_cpu(l->accesstime));
406 ino->i_atime = i->i_accesstime;
407 i->atime_offset = 0; /* Will be filled-in later probably */
408 lafs_add_atime_offset(&ino->i_atime, i->atime_offset);
409 ino->i_size = le64_to_cpu(l->size);
410 i->parent = le32_to_cpu(l->parent);
411 ino->i_nlink = le32_to_cpu(l->linkcount);
412 if (ino->i_nlink == 0 && list_empty(&b->orphans) &&
413 fs_from_inode(ino)->rolled) {
414 /* This block should already be on the orphan
415 * list, otherwise there is a filesystem
417 * Either the orphan file is wrong, or the
418 * linkcount is wrong.
419 * It is safest to assume the later - either
420 * way an FS check would be needed to fix it.
421 * Note: while roll-forward is happening, this
422 * situation is perfectly possible and is handled
425 /* FIXME set a superblock flag requesting
426 * directory linkage checking
431 dprintk(" mode = 0%o uid %d size %lld\n",
432 ino->i_mode, ino->i_uid, ino->i_size);
435 ino->i_op = &lafs_file_ino_operations;
436 ino->i_fop = &lafs_file_file_operations;
437 ino->i_mode = (ino->i_mode & 07777) | S_IFREG;
440 i->seed = le32_to_cpu(d->hash_seed);
441 ino->i_op = &lafs_dir_ino_operations;
442 ino->i_fop = &lafs_dir_file_operations;
443 ino->i_mode = (ino->i_mode & 07777) | S_IFDIR;
446 dprintk("Hmm. %d %d %d\n",
453 ino->i_op = &lafs_link_ino_operations;
454 ino->i_mode = (ino->i_mode & 07777) | S_IFLNK;
457 /* the data had better be in the inode ... */
458 ino->i_rdev = MKDEV(le32_to_cpu(s->major),
459 le32_to_cpu(s->minor));
460 ino->i_op = &lafs_special_ino_operations;
461 init_special_inode(ino, ino->i_mode, ino->i_rdev);
468 ino->i_blkbits = ino->i_sb->s_blocksize_bits;
469 /* FIXME i_blocks and i_byte - used for quota?? */
472 /* Note: no refcount yet. Either will remove the reference to the
476 rcu_assign_pointer(b->my_inode, ino);
480 printk("inode %lu type is %d\n",
481 (unsigned long)ino->i_ino, li->type);
482 unmap_dblock(b, lai);
486 static void check_atime_ref(struct inode *ino, int async)
488 /* If there is an atime file in this filesystem the inode
489 * should hold a reference to the relevant block in
492 struct inode *root, *at;
496 /* Never bother for async lookups */
498 if (LAFSI(ino)->type < TypeBase)
500 if (test_bit(I_AccessTime, &LAFSI(ino)->iflags))
502 root = LAFSI(ino)->filesys;
503 at = LAFSI(root)->md.fs.accesstime;
507 if (LAFSI(ino)->md.file.atime_offset)
508 LAFSI(ino)->md.file.atime_offset = 0;
510 /* "* 2" to get byte number, then shift to get block
511 * number. So just shift by 1 less than blkbits.
513 bnum = ino->i_ino >> (at->i_blkbits-1);
514 b = lafs_get_block(at, bnum, NULL, GFP_NOFS, MKREF(atime));
516 if (lafs_read_block(b) == 0) {
520 i = (ino->i_ino * 2) & ((1<<at->i_blkbits)-1);
521 LAFSI(ino)->md.file.atime_offset = le16_to_cpu(atp[i]);
522 set_bit(I_AccessTime, &LAFSI(ino)->iflags);
523 unmap_dblock(b, atp);
524 lafs_add_atime_offset(&ino->i_atime,
525 LAFSI(ino)->md.file.atime_offset);
527 putdref(b, MKREF(atime));
531 void lafs_add_atime_offset(struct timespec *atime, int offset)
538 expon = offset & 0x1f;
540 mantissa = (offset >> 5) | 0x800;
542 mantissa = (offset >> 5);
545 mantissa <<= expon-11;
546 atime->tv_sec += mantissa;
550 mantissa <<= expon-1;
551 timespec_add_ns(atime, (s64)mantissa * 1000000);
555 static int normalise(int *mantissa)
557 /* Shift down until value can be stored in 12 bits:
558 * Top bit will be '1', so only 11 bits needed.
559 * Not used on values below 2048.
562 while (*mantissa >= 4096) {
569 static int update_atime_delta(struct inode *ino)
571 /* calculate new delta to show the difference between
572 * i_atime and i_accesstime
575 if (LAFSI(ino)->type < TypeBase)
577 if (timespec_compare(&ino->i_atime,
578 &LAFSI(ino)->md.file.i_accesstime) <= 0) {
579 /* We cannot store negative delta so if i_atime is in the
580 * past, just store zero
584 struct timespec diff;
587 diff = timespec_sub(ino->i_atime,
588 LAFSI(ino)->md.file.i_accesstime);
589 if (diff.tv_sec >= 2048) {
590 /* Just store the seconds */
592 shift = normalise(&rv) + 11;
594 /* Store the milliseconds */
595 int rv = diff.tv_nsec / 1000000;
596 rv += diff.tv_sec * 1000;
598 shift = normalise(&rv) + 1;
610 if (LAFSI(ino)->md.file.atime_offset == rv)
613 LAFSI(ino)->md.file.atime_offset = rv;
617 static void store_atime_delta(struct inode *ino)
625 if (!test_bit(I_AccessTime, &LAFSI(ino)->iflags))
626 /* sorry, nothing we can do here */
629 /* We own a reference, so this lookup must succeed */
630 at = LAFSI(LAFSI(ino)->filesys)->md.fs.accesstime;
631 bnum = ino->i_ino >> (at->i_blkbits-1);
632 b = lafs_get_block(at, bnum, NULL, GFP_NOFS, MKREF(store_atime));
635 i = (ino->i_ino * 2) & ((1<<at->i_blkbits)-1);
636 if (le16_to_cpu(atp[i]) != LAFSI(ino)->md.file.atime_offset) {
637 atp[i] = cpu_to_le16(LAFSI(ino)->md.file.atime_offset);
638 /* FIXME - I could lose an update here - do I care? */
639 /* Can only reserve NewSpace with checkpoint locked... */
640 lafs_checkpoint_lock(fs_from_inode(ino));
641 if (lafs_reserve_block(&b->b, NewSpace) == 0)
642 lafs_dirty_dblock(b);
643 lafs_checkpoint_unlock(fs_from_inode(ino));
645 unmap_dblock(b, atp);
646 putdref(b, MKREF(store_atime));
649 void lafs_inode_checkpin(struct inode *ino)
651 /* Make sure I_Pinned is set correctly.
652 * It should be set precisely if i_nlink is non-zero,
653 * and ->iblock is B_Pinned.
654 * When it is set, we own a reference to the inode.
656 * This needs to be called whenever we change
657 * i_nlink, and whenever we pin or unpin an InoIdx
660 if (ino->i_nlink == 0) {
661 /* I_Pinned should not be set */
662 if (test_and_clear_bit(I_Pinned, &LAFSI(ino)->iflags)) {
663 if (ino->i_sb->s_type == &lafs_fs_type)
669 /* Need to check if iblock is Pinned. */
670 struct indexblock *ib = NULL;
671 if (LAFSI(ino)->iblock) {
672 spin_lock(&ino->i_data.private_lock);
673 ib = LAFSI(ino)->iblock;
674 if (ib && !test_bit(B_Pinned, &ib->b.flags))
676 spin_unlock(&ino->i_data.private_lock);
679 if (!test_and_set_bit(I_Pinned, &LAFSI(ino)->iflags)) {
680 if (ino->i_sb->s_type == &lafs_fs_type)
686 if (test_and_clear_bit(I_Pinned, &LAFSI(ino)->iflags)) {
687 if (ino->i_sb->s_type == &lafs_fs_type)
696 struct datablock *lafs_inode_get_dblock(struct inode *ino, REFARG)
698 struct datablock *db;
700 spin_lock(&ino->i_data.private_lock);
701 db = LAFSI(ino)->dblock;
703 if (db->b.inode == ino)
704 getdref_locked(db, REF);
706 spin_lock_nested(&db->b.inode->i_data.private_lock, 1);
707 getdref_locked(db, REF);
708 spin_unlock(&db->b.inode->i_data.private_lock);
711 spin_unlock(&ino->i_data.private_lock);
715 struct datablock *lafs_inode_dblock(struct inode *ino, int async, REFARG)
717 struct datablock *db;
720 db = lafs_inode_get_dblock(ino, REF);
723 db = lafs_get_block(LAFSI(ino)->filesys, ino->i_ino, NULL,
726 return ERR_PTR(-ENOMEM);
728 LAFSI(ino)->dblock = db;
729 rcu_assign_pointer(db->my_inode, ino);
731 err = lafs_read_block_async(db);
733 err = lafs_read_block(db);
741 void lafs_inode_init(struct datablock *b, int type, int mode, struct inode *dir)
743 /* A new block has been allocated in an inode file to hold an
744 * inode. We get to fill in initial values so that when
745 * 'iget' calls lafs_import_inode, the correct inode is
748 struct fs *fs = fs_from_inode(b->b.inode);
749 struct la_inode *lai = map_dblock(b);
752 lai->data_blocks = cpu_to_le32(0);
753 lai->index_blocks = cpu_to_le32(0);
754 get_random_bytes(&lai->generation, sizeof(lai->generation));
757 lai->filetype = type;
763 struct fs_metadata *l = &lai->metadata[0].fs;
764 size = sizeof(struct fs_metadata);
767 l->blocks_allowed = 0;
768 l->creation_age = fs->wc[0].cluster_seq;
771 l->quota_inodes[0] = 0;
772 l->quota_inodes[1] = 0;
773 l->quota_inodes[2] = 0;
774 l->snapshot_usage_table = 0;
776 /* name will be zero length and not used */
781 struct inodemap_metadata *l = &lai->metadata[0].inodemap;
783 size = sizeof(struct inodemap_metadata);
787 size = sizeof(struct su_metadata);
790 size = sizeof(struct quota_metadata);
800 struct file_metadata *l = &lai->metadata[0].file;
801 struct timespec now = CURRENT_TIME;
803 l->flags = cpu_to_le16(0);
804 l->userid = cpu_to_le32(current->cred->fsuid);
805 if (dir && (dir->i_mode & S_ISGID)) {
806 l->groupid = cpu_to_le32(dir->i_gid);
810 l->groupid = cpu_to_le32(current->cred->fsgid);
811 if (dir && LAFSI(dir)->md.file.treeid)
812 l->treeid = cpu_to_le32(LAFSI(dir)->md.file.treeid);
814 l->treeid = l->userid;
816 l->mode = cpu_to_le16(mode);
817 l->creationtime = encode_time(&now);
818 l->modifytime = l->creationtime;
819 l->ctime = l->creationtime;
820 l->accesstime = l->creationtime;
822 l->parent = dir ? cpu_to_le32(dir->i_ino) : 0;
825 if (type == TypeDir) {
826 struct dir_metadata *l = &lai->metadata[0].dir;
828 get_random_bytes(&seed,
830 seed = (seed & ~7) | 1;
831 l->hash_seed = cpu_to_le32(seed);
832 size = sizeof(struct dir_metadata);
833 } else if (type == TypeSpecial) {
834 struct special_metadata *s = &lai->metadata[0].special;
835 s->major = s->minor = 0;
836 size = sizeof(struct special_metadata);
838 size = sizeof(struct file_metadata);
841 size += sizeof(struct la_inode);
842 lai->metadata_size = cpu_to_le32(size);
843 memset(((char *)lai)+size, 0, fs->blocksize-size);
844 *(u16 *)(((char *)lai)+size) = cpu_to_le16(IBLK_EXTENT);
846 unmap_dblock(b, lai);
847 set_bit(B_Valid, &b->b.flags);
848 LAFS_BUG(!test_bit(B_Pinned, &b->b.flags), &b->b);
849 lafs_dirty_dblock(b);
852 static int inode_map_free(struct fs *fs, struct inode *fsys, u32 inum);
854 void lafs_evict_inode(struct inode *ino)
856 struct fs *fs = fs_from_inode(ino);
857 struct lafs_inode *li = LAFSI(ino);
859 if (ino->i_mode == 0) {
860 /* There never was an inode here,
862 * We just call end_writeback to get the
863 * flags set properly.
869 dprintk("EVICT INODE %d\n", (int)ino->i_ino);
872 /* Normal truncation holds an igrab, so we cannot be
873 * deleted until any truncation finishes
875 BUG_ON(test_bit(I_Trunc, &LAFSI(ino)->iflags));
877 if (ino->i_nlink == 0) {
878 struct datablock *b =
879 lafs_inode_dblock(ino, SYNC, MKREF(delete_inode));
880 i_size_write(ino, 0);
881 truncate_inode_pages(&ino->i_data, 0);
882 LAFSI(ino)->trunc_next = 0;
883 set_bit(I_Deleting, &LAFSI(ino)->iflags);
884 set_bit(I_Trunc, &LAFSI(ino)->iflags);
887 set_bit(B_Claimed, &b->b.flags);
888 lafs_add_orphan(fs, b);
889 dprintk("PUNCH hole for %d\n", (int)b->b.fileaddr);
890 putdref(b, MKREF(delete_inode));
892 inode_map_free(fs, LAFSI(ino)->filesys, ino->i_ino);
894 truncate_inode_pages(&ino->i_data, 0);
897 dprintk("CLEAR INODE %d\n", (int)ino->i_ino);
901 /* Now is a good time to break the linkage between
902 * inode and dblock - but not if the file is
905 if (!test_bit(I_Deleting, &li->iflags)) {
906 struct datablock *db;
907 spin_lock(&ino->i_data.private_lock);
910 struct indexblock *ib = li->iblock;
911 LAFS_BUG(ib && atomic_read(&ib->b.refcnt), &db->b);
916 spin_unlock(&ino->i_data.private_lock);
919 /* FIXME release quota inodes if filesystem */
922 static int prune(void *data, u32 addr, u64 paddr, int len)
924 /* This whole index block is being pruned, just account
925 * for everything and it will be cleared afterwards
927 struct indexblock *ib = data;
928 struct inode *ino = ib->b.inode;
929 struct fs *fs = fs_from_inode(ino);
930 int ph = !!test_bit(B_Phase1, &ib->b.flags);
932 dprintk("PRUNE %d for %d at %lld\n", addr, len, (long long)paddr);
933 if (paddr == 0 || len == 0)
935 for (i = 0 ; i < len ; i++)
936 lafs_summary_update(fs, ino, paddr+i, 0, 0, ph, 0);
940 static int prune_some(void *data, u32 addr, u64 paddr, int len)
942 /* Part of this index block is being pruned. Copy
943 * what addresses we can into uninc_table so that
944 * it can be 'incorporated'
945 * We should probably share some code with
946 * lafs_allocated_block??
948 struct indexblock *ib = data;
949 struct inode *ino = ib->b.inode;
950 struct fs *fs = fs_from_inode(ino);
951 int ph = !!test_bit(B_Phase1, &ib->b.flags);
954 if (paddr == 0 || len == 0)
956 dprintk("PRUNE2 %d for %d at %lld\n", addr, len, (long long)paddr);
957 for (i = 0 ; i < len ; i++) {
958 /* FIXME should allow longer truncation ranges in uninc_table
959 * as they are easy to handle.
962 if (addr + i < LAFSI(ino)->trunc_next)
964 spin_lock(&ino->i_data.private_lock);
965 a = &ib->uninc_table.pending_addr
966 [ib->uninc_table.pending_cnt - 1];
967 if (ib->uninc_table.pending_cnt <
968 ARRAY_SIZE(ib->uninc_table.pending_addr)) {
970 a->fileaddr = addr + i;
973 LAFS_BUG(!test_bit(B_Pinned, &ib->b.flags), &ib->b);
974 ib->uninc_table.pending_cnt++;
976 spin_unlock(&ino->i_data.private_lock);
979 spin_unlock(&ino->i_data.private_lock);
980 lafs_summary_update(fs, ino, paddr+i, 0, 0, ph, 0);
985 int lafs_inode_handle_orphan(struct datablock *b)
987 /* Don't need rcu protection for my_inode run_orphan
990 struct indexblock *ib, *ib2;
991 struct inode *ino = b->my_inode;
992 struct fs *fs = fs_from_inode(ino);
993 u32 trunc_next, next_trunc;
997 if (!test_bit(I_Trunc, &LAFSI(ino)->iflags)) {
998 if (test_bit(I_Deleting, &LAFSI(ino)->iflags)) {
999 LAFS_BUG(ino->i_nlink, &b->b);
1000 if (LAFSI(ino)->cblocks +
1001 LAFSI(ino)->pblocks +
1002 LAFSI(ino)->ablocks +
1003 LAFSI(ino)->ciblocks +
1004 LAFSI(ino)->piblocks)
1005 printk("Deleting inode %lu: %ld+%ld+%ld %ld+%ld\n",
1007 LAFSI(ino)->cblocks,
1008 LAFSI(ino)->pblocks,
1009 LAFSI(ino)->ablocks,
1010 LAFSI(ino)->ciblocks,
1011 LAFSI(ino)->piblocks);
1012 BUG_ON(LAFSI(ino)->cblocks +
1013 LAFSI(ino)->pblocks +
1014 LAFSI(ino)->ablocks +
1015 LAFSI(ino)->ciblocks +
1016 LAFSI(ino)->piblocks);
1017 if (lafs_erase_dblock_async(b))
1018 lafs_orphan_release(fs, b);
1019 } else if (ino->i_nlink || LAFSI(ino)->type == 0)
1020 lafs_orphan_release(fs, b);
1022 lafs_orphan_forget(fs, b);
1026 ib = lafs_make_iblock(ino, ADOPT, SYNC, MKREF(inode_handle_orphan));
1030 /* Here is the guts of 'truncate'. We find the next leaf index
1031 * block and discard all the addresses there-in.
1033 trunc_next = LAFSI(ino)->trunc_next;
1035 if (trunc_next == 0xFFFFFFFF) {
1036 /* truncate has finished in that all data blocks
1037 * have been removed and all index block are either
1038 * gone or pending incorporation at which point they will
1040 * If we hit a phase change, we will need to postpone
1041 * the rest of the cleaning until it completes.
1042 * If there is a checkpoint happening, then all the work
1043 * that we can do now, it will do for us. So just
1046 struct indexblock *tmp;
1047 struct indexblock *next;
1050 if (!test_bit(B_Pinned, &ib->b.flags)) {
1051 /* must be finished */
1052 LAFS_BUG(test_bit(B_Dirty, &ib->b.flags), &ib->b);
1053 clear_bit(I_Trunc, &LAFSI(ino)->iflags);
1055 wake_up(&fs->trunc_wait);
1059 if (fs->checkpointing) {
1060 /* This cannot happen with current code,
1061 * but leave it in case we ever have
1062 * orphan handling parallel with checkpoints
1064 err = -EBUSY; /* Try again after the checkpoint */
1068 lastaddr = (i_size_read(ino) +
1070 >> fs->blocksize_bits;
1071 /* Find a Pinned descendent of ib which has no
1072 * Pinned descendents and no PrimaryRef dependent
1073 * (so take the last).
1074 * Prefer blocks that are beyond EOF (again, take the last).
1075 * If there are none, descend the last block that
1076 * is not after EOF and look at its children.
1079 spin_lock(&ib->b.inode->i_data.private_lock);
1083 list_for_each_entry(tmp, &ib2->children, b.siblings) {
1084 if (!test_bit(B_Index, &tmp->b.flags) ||
1085 !test_bit(B_Pinned, &tmp->b.flags))
1088 tmp->b.fileaddr > next->b.fileaddr)
1092 if (ib2->b.fileaddr < lastaddr) {
1093 /* Must be all done */
1094 spin_unlock(&ib->b.inode->i_data.private_lock);
1095 clear_bit(I_Trunc, &LAFSI(ino)->iflags);
1097 wake_up(&fs->trunc_wait);
1101 getiref(ib2, MKREF(inode_handle_orphan2));
1102 spin_unlock(&ib->b.inode->i_data.private_lock);
1104 /* ib2 is an index block beyond EOF with no
1106 * Incorporating it should unpin it.
1108 if (!list_empty(&ib2->children)) {
1109 lafs_print_tree(&ib2->b, 3);
1110 LAFS_BUG(1, &ib2->b);
1113 if (!lafs_iolock_written_async(&ib2->b)) {
1114 putiref(ib2, MKREF(inode_handle_orphan2));
1118 while (ib2->uninc_table.pending_cnt || ib2->uninc)
1119 lafs_incorporate(fs, ib2);
1121 if (test_bit(B_Dirty, &ib2->b.flags) ||
1122 test_bit(B_Realloc, &ib2->b.flags))
1123 lafs_cluster_allocate(&ib2->b, 0);
1125 lafs_iounlock_block(&ib2->b);
1127 if (!list_empty(&ib2->b.siblings)) {
1128 printk("looping on %s\n", strblk(&ib2->b));
1133 putiref(ib2, MKREF(inode_handle_orphan2));
1136 if (lafs_iolock_written_async(&ib->b)) {
1138 lafs_incorporate(fs, ib);
1139 lafs_iounlock_block(&ib->b);
1144 putiref(ib, MKREF(inode_handle_orphan));
1148 putiref(ib, MKREF(inode_handle_orphan));
1150 ib = lafs_leaf_find(ino, trunc_next, ADOPT, &next_trunc,
1151 ASYNC, MKREF(inode_handle_orphan3));
1154 /* now hold an iolock on ib */
1156 /* Ok, trunc_next seems to refer to a block that exists.
1157 * We need to erase it..
1159 * So we open up the index block ourselves, call
1160 * lafs_summary_update with each block address, and then
1164 if (LAFSI(ino)->depth == 0) {
1165 /* Nothing to truncate */
1166 clear_bit(I_Trunc, &LAFSI(ino)->iflags);
1168 if (test_bit(B_Pinned, &ib->b.flags))
1169 /* Need to move the dirtiness which keeps this
1170 * pinned to the data block.
1172 lafs_cluster_allocate(&ib->b, 0);
1174 lafs_iounlock_block(&ib->b);
1179 lafs_checkpoint_lock(fs);
1180 err = lafs_reserve_block(&ib->b, ReleaseSpace);
1184 if (!test_bit(B_Valid, &ib->b.flags) &&
1185 test_bit(B_InoIdx, &ib->b.flags)) {
1186 /* still invalid, just re-erase to remove
1188 LAFSI(ino)->trunc_next = next_trunc;
1189 lafs_cluster_allocate(&ib->b, 0);
1194 lafs_pin_block(&ib->b);
1196 /* It might be that this can happen, in which case
1197 * we simply update trunc_next and loop. But I'd like
1198 * to be sure before I implement that
1200 if (!test_bit(B_Valid, &ib->b.flags)) {
1201 printk("Not Valid: %s\n", strblk(&ib->b));
1202 printk("depth = %d\n", LAFSI(ino)->depth);
1203 if (test_bit(B_InoIdx, &ib->b.flags))
1204 printk("DB: %s\n", strblk(&LAFSI(ib->b.inode)->dblock->b));
1205 LAFSI(ino)->trunc_next = next_trunc;
1206 //BUG_ON(!test_bit(B_Valid, &ib->b.flags));
1211 if (ib->b.fileaddr < trunc_next &&
1212 lafs_leaf_next(ib, 0) < trunc_next) {
1213 /* We only want to truncate part of this index block.
1214 * So we copy addresses into uninc_table and then
1215 * call lafs_incorporate.
1216 * This might cause the index tree to grow, so we
1217 * cannot trust next_trunc
1219 if (ib->uninc_table.pending_cnt == 0 &&
1220 ib->uninc == NULL) {
1221 lafs_dirty_iblock(ib, 0);
1222 /* FIXME this just removes 8 blocks at a time,
1223 * which is not enough
1225 lafs_walk_leaf_index(ib, prune_some, ib);
1227 if (test_bit(B_Dirty, &ib->b.flags))
1228 lafs_incorporate(fs, ib);
1232 LAFSI(ino)->trunc_next = next_trunc;
1234 while (ib->uninc_table.pending_cnt || ib->uninc) {
1235 /* There should be no Realloc data blocks here
1236 * but index blocks might be realloc still.
1238 LAFS_BUG(!test_bit(B_Dirty, &ib->b.flags) &&
1239 !test_bit(B_Realloc, &ib->b.flags), &ib->b);
1240 lafs_incorporate(fs, ib);
1242 if (test_bit(B_InoIdx, &ib->b.flags) ||
1243 !test_bit(B_PhysValid, &ib->b.flags) ||
1244 ib->b.physaddr != 0) {
1245 lafs_walk_leaf_index(ib, prune, ib);
1246 lafs_clear_index(ib);
1247 lafs_dirty_iblock(ib, 0);
1249 if (test_bit(B_Dirty, &ib->b.flags))
1250 lafs_incorporate(fs, ib);
1251 if (!list_empty(&ib->children))
1252 lafs_print_tree(&ib->b, 2);
1253 LAFS_BUG(!list_empty(&ib->children), &ib->b);
1256 lafs_iounlock_block(&ib->b);
1258 lafs_checkpoint_unlock(fs);
1260 putiref(ib, MKREF(inode_handle_orphan3));
1264 void lafs_dirty_inode(struct inode *ino)
1266 /* this is called in one of three cases:
1267 * 1/ by lafs internally when dblock or iblock is pinned and
1268 * ready to be dirtied
1269 * 2/ by writeout before requesting a write - to update mtime
1270 * 3/ by read to update atime
1272 * We want to handle atime updates carefully as they may not change
1273 * the stored inode itself.
1274 * For all other updates, the inode dblock exists and is pinned.
1275 * In those cases we will be updating the inode and so can store
1276 * the atime exactly.
1277 * For an atime update, the dblock may not exists, or may not be
1278 * Pinned. If it isn't then we don't want to make the inode dirty
1279 * but only want to update the delta stored in the atime file.
1280 * The block for that should already be pinned.
1283 * We mustn't update the data block as it could be in
1284 * writeout and we cannot always wait safely.
1285 * So require that anyone who really cares, dirties the datablock
1286 * or a child themselves.
1287 * When cluster_allocate eventually gets called, it will update
1288 * the datablock from the inode.
1289 * If an update has to wait for the next phase, lock_dblock
1290 * (e.g. in setattr) will do that.
1292 * We also use this opportunity to update the filesystem modify time.
1294 struct timespec now;
1295 struct inode *filesys;
1298 if (LAFSI(ino)->dblock) {
1299 struct datablock *db;
1300 spin_lock(&ino->i_data.private_lock);
1301 db = LAFSI(ino)->dblock;
1302 if (db && test_bit(B_Pinned, &db->b.flags))
1304 spin_unlock(&ino->i_data.private_lock);
1308 if (update_atime_delta(ino))
1309 store_atime_delta(ino);
1313 set_bit(I_Dirty, &LAFSI(ino)->iflags);
1314 ino->i_sb->s_dirt = 1;
1316 if (LAFSI(ino)->type < TypeBase)
1318 LAFSI(ino)->md.file.i_accesstime = ino->i_atime;
1319 if (LAFSI(ino)->md.file.atime_offset) {
1320 LAFSI(ino)->md.file.atime_offset = 0;
1321 store_atime_delta(ino);
1324 now = current_fs_time(ino->i_sb);
1325 filesys = LAFSI(ino)->filesys;
1326 if (!timespec_equal(&filesys->i_mtime, &now)) {
1327 filesys->i_mtime = now;
1328 set_bit(I_Dirty, &LAFSI(filesys)->iflags);
1332 int lafs_sync_inode(struct inode *ino, int wait)
1334 /* fsync has been called on this file so we need
1335 * to sync any inode updates to the next cluster.
1337 * If we cannot create an update record,
1338 * we wait for a phase change, which writes everything
1341 struct datablock *b;
1342 struct fs *fs = fs_from_inode(ino);
1343 struct update_handle uh;
1347 if (LAFSI(ino)->update_cluster > 1)
1348 lafs_cluster_wait(fs, LAFSI(ino)->update_cluster);
1349 if (LAFSI(ino)->update_cluster == 1) {
1350 lafs_checkpoint_lock(fs);
1351 lafs_checkpoint_unlock_wait(fs);
1356 LAFSI(ino)->update_cluster = 0;
1357 if (!test_bit(I_Dirty, &LAFSI(ino)->iflags))
1359 b = lafs_inode_dblock(ino, SYNC, MKREF(write_inode));
1363 lafs_iolock_written(&b->b);
1364 lafs_inode_fillblock(ino);
1365 lafs_iounlock_block(&b->b);
1367 err = lafs_cluster_update_prepare(&uh, fs, LAFS_INODE_LOG_SIZE);
1369 lafs_cluster_update_abort(&uh);
1371 lafs_checkpoint_lock(fs);
1372 if (lafs_cluster_update_pin(&uh) == 0) {
1373 if (test_and_clear_bit(B_Dirty, &b->b.flags))
1374 lafs_space_return(fs, 1);
1375 LAFSI(ino)->update_cluster =
1376 lafs_cluster_update_commit
1377 (&uh, b, LAFS_INODE_LOG_START,
1378 LAFS_INODE_LOG_SIZE);
1380 lafs_cluster_update_abort(&uh);
1381 lafs_checkpoint_unlock(fs);
1383 if (test_bit(B_Dirty, &b->b.flags)) {
1384 /* FIXME need to write out the data block...
1385 * Is that just lafs_cluster_allocate ?
1389 if (LAFSI(ino)->update_cluster == 0) {
1390 lafs_checkpoint_lock(fs);
1391 if (test_bit(B_Dirty, &b->b.flags))
1392 LAFSI(ino)->update_cluster = 1;
1393 lafs_checkpoint_start(fs);
1394 lafs_checkpoint_unlock(fs);
1396 putdref(b, MKREF(write_inode));
1397 return 0; /* FIXME should I return some error message??? */
1400 void lafs_inode_fillblock(struct inode *ino)
1402 /* copy data from ino into the related data block */
1404 struct lafs_inode *li = LAFSI(ino);
1405 struct datablock *db = li->dblock;
1406 struct la_inode *lai;
1408 clear_bit(I_Dirty, &LAFSI(ino)->iflags);
1410 lai = map_dblock(db);
1411 lai->data_blocks = cpu_to_le32(li->cblocks);
1412 lai->index_blocks = cpu_to_le32(li->ciblocks);
1413 lai->generation = cpu_to_le16(ino->i_generation);
1414 lai->trunc_gen = li->trunc_gen;
1415 lai->flags = li->flags;
1416 lai->filetype = li->type;
1417 if (lai->metadata_size != cpu_to_le16(li->metadata_size)) {
1418 /* Changing metadata size is wierd.
1419 * We will need to handle this somehow for xattrs
1420 * For now we just want to cope with
1421 * Dir -> InodeFile changes, and that guarantees us
1422 * there is no index info - so just clear the index
1425 u16 *s = (u16*)(((char*)lai) + li->metadata_size);
1426 BUG_ON(li->type != TypeInodeFile);
1427 lai->metadata_size = cpu_to_le16(li->metadata_size);
1428 memset(s, 0, ino->i_sb->s_blocksize - li->metadata_size);
1429 *s = cpu_to_le16(IBLK_INDIRECT);
1431 lai->depth = li->depth;
1436 struct fs_md *i = &li->md.fs;
1437 struct fs_metadata *l = &lai->metadata[0].fs;
1440 l->snapshot_usage_table = cpu_to_le16(i->usagetable);
1441 l->update_time = cpu_to_le64(encode_time(&ino->i_mtime));
1442 l->blocks_used = cpu_to_le64(i->cblocks_used);
1443 l->blocks_allowed = cpu_to_le64(i->blocks_allowed);
1444 l->creation_age = cpu_to_le64(i->creation_age);
1445 l->inodes_used = cpu_to_le32(i->inodes_used);
1446 l->parent = cpu_to_le32(i->parent);
1447 l->quota_inodes[0] = cpu_to_le32(i->quota_inums[0]);
1448 l->quota_inodes[1] = cpu_to_le32(i->quota_inums[1]);
1449 l->quota_inodes[2] = cpu_to_le32(i->quota_inums[2]);
1450 nlen = lai->metadata_size - offsetof(struct la_inode,
1451 metadata[0].fs.name);
1452 memset(l->name, 0, nlen);
1453 if (i->name == NULL)
1455 else if (strlen(i->name) < nlen)
1456 nlen = strlen(i->name);
1457 memcpy(l->name, i->name, nlen);
1463 struct inodemap_md *m = &li->md.inodemap;
1464 struct inodemap_metadata *s = &lai->metadata[0].inodemap;
1465 s->size = cpu_to_le32(m->size);
1469 case TypeSegmentMap:
1471 struct su_md *m = &li->md.segmentusage;
1472 struct su_metadata *s = &lai->metadata[0].segmentusage;
1473 s->table_size = cpu_to_le32(m->table_size);
1479 struct quota_md *m = &li->md.quota;
1480 struct quota_metadata *s = &lai->metadata[0].quota;
1481 s->gracetime = cpu_to_le32(m->gracetime);
1482 s->graceunits = cpu_to_le32(m->graceunits);
1485 case TypeOrphanList:
1486 case TypeAccessTime:
1489 default: /* TypeBase or larger */
1491 struct file_md *i = &li->md.file;
1492 struct file_metadata *l = &lai->metadata[0].file;
1493 struct dir_metadata *d = &lai->metadata[0].dir;
1494 struct special_metadata *s = &lai->metadata[0].special;
1496 if (li->type < TypeBase)
1498 l->flags = cpu_to_le16(i->flags);
1499 l->mode = cpu_to_le16(ino->i_mode);
1500 l->userid = cpu_to_le32(ino->i_uid);
1501 l->groupid = cpu_to_le32(ino->i_gid);
1502 l->treeid = cpu_to_le32(i->treeid);
1503 l->creationtime = cpu_to_le64(i->creationtime);
1504 l->modifytime = cpu_to_le64(encode_time(&ino->i_mtime));
1505 l->ctime = cpu_to_le64(encode_time(&ino->i_ctime));
1506 l->accesstime = cpu_to_le64(encode_time(&i->i_accesstime));
1507 l->size = cpu_to_le64(ino->i_size);
1508 l->parent = cpu_to_le32(i->parent);
1509 l->linkcount = cpu_to_le32(ino->i_nlink);
1515 d->hash_seed = cpu_to_le32(i->seed);
1520 s->major = cpu_to_le32(MAJOR(ino->i_rdev));
1521 s->minor = cpu_to_le32(MINOR(ino->i_rdev));
1526 unmap_dblock(db, lai);
1529 /*-----------------------------------------------------------------------
1530 * Inode allocate map handling.
1531 * Inode 1 of each fileset is a bitmap of free inode numbers.
1532 * Whenever the file is extended in size, new bits are set to one. They
1533 * are then cleared when the inode is allocated. When a block becomes
1534 * full of zeros, we don't need to store it any more.
1536 * We don't clear the bit until we are committed to creating an inode
1537 * This means we cannot clear it straight away, so two different threads
1538 * might see the same inode number as being available. We have two
1539 * approaches to guard against this.
1540 * Firstly we have a 'current' pointer into the inodemap file and
1541 * increase that past the inode we return. This discourages multiple
1542 * hits but as the pointer would need to be rewound occasionally it
1543 * isn't a guarantee. The guarantee against multiple allocations is done
1544 * via a flag in the block representing an inode. This is set
1545 * while an inode is being allocated.
1548 /* inode number allocation has the prealloc/pin/commit/abort structure
1549 * so it can be committed effectively
1553 choose_free_inum(struct fs *fs, struct inode *fsys, u32 *inump,
1554 struct datablock **bp, int *restarted)
1556 struct inode *im = lafs_iget(fsys, 1, SYNC);
1558 struct datablock *b;
1564 struct inode *i = (*bp)->b.inode;
1565 putdref(*bp, MKREF(cfi_map));
1570 mutex_lock_nested(&im->i_mutex, I_MUTEX_QUOTA);
1572 bnum = LAFSI(im)->md.inodemap.thisblock;
1574 if (bnum == NoBlock ||
1575 LAFSI(im)->md.inodemap.nextbit >= (fs->blocksize<<3)) {
1576 if (bnum == NoBlock)
1577 bnum = LAFSI(im)->md.inodemap.size;
1579 if (bnum+1 < LAFSI(im)->md.inodemap.size)
1581 else if (!*restarted) {
1585 /* Need to add a new block to the file */
1586 bnum = LAFSI(im)->md.inodemap.size;
1587 b = lafs_get_block(im, bnum, NULL, GFP_KERNEL,
1592 lafs_iolock_written(&b->b);
1593 set_bit(B_PinPending, &b->b.flags);
1594 lafs_iounlock_block(&b->b);
1596 lafs_checkpoint_lock(fs);
1597 err = lafs_pin_dblock(b, NewSpace);
1598 if (err == -EAGAIN) {
1599 lafs_checkpoint_unlock_wait(fs);
1605 buf = map_dblock(b);
1606 /* Set block to "all are free" */
1607 memset(buf, 0xff, fs->blocksize);
1608 unmap_dblock(b, buf);
1609 set_bit(B_Valid, &b->b.flags);
1610 LAFSI(im)->md.inodemap.size = bnum+1;
1611 lafs_dirty_inode(im);
1612 lafs_dirty_dblock(b);
1613 lafs_checkpoint_unlock(fs);
1614 putdref(b, MKREF(cfi_map));
1617 err = lafs_find_next(im, &bnum);
1623 LAFSI(im)->md.inodemap.nextbit = 0;
1624 LAFSI(im)->md.inodemap.thisblock = bnum;
1627 b = lafs_get_block(im, bnum, NULL, GFP_KERNEL, MKREF(cfi_map));
1631 err = lafs_find_block(b, NOADOPT);
1634 if (b->b.physaddr == 0 && !test_bit(B_Valid, &b->b.flags)) {
1635 LAFSI(im)->md.inodemap.nextbit =
1636 (fs->blocksize<<3) + 1;
1637 putdref(b,MKREF(cfi_map));
1640 err = lafs_read_block(b);
1644 bit = LAFSI(im)->md.inodemap.nextbit;
1645 LAFSI(im)->md.inodemap.thisblock = bnum;
1646 buf = map_dblock(b);
1647 while (bnum == 0 && bit < 16) {
1648 /* Never return an inum below 16 - they are special */
1649 if (!generic_test_le_bit(bit, (unsigned long *)buf))
1650 generic___clear_le_bit(bit, (unsigned long *)buf);
1654 bit = generic_find_next_le_bit((unsigned long *)buf,
1655 fs->blocksize<<3, bit);
1656 unmap_dblock(b, buf);
1657 LAFSI(im)->md.inodemap.nextbit = bit+1;
1658 if (bit >= fs->blocksize<<3) {
1659 putdref(b,MKREF(cfi_map));
1662 mutex_unlock(&im->i_mutex);
1664 *inump = bit + (bnum << (im->i_blkbits + 3));
1668 lafs_checkpoint_unlock(fs);
1670 putdref(b, MKREF(cfi_map));
1672 mutex_unlock(&im->i_mutex);
1677 struct inode_map_new_info {
1678 struct datablock *ib, *mb;
1682 inode_map_new_prepare(struct fs *fs, int inum, struct inode *fsys,
1683 struct inode_map_new_info *imni)
1688 struct datablock *b;
1690 imni->ib = imni->mb = NULL;
1693 /* choose a possibly-free inode number */
1694 err = choose_free_inum(fs, fsys, &choice,
1695 &imni->mb, &restarted);
1699 b = lafs_get_block(fsys, choice, NULL, GFP_KERNEL,
1704 if (test_and_set_bit(B_Claimed, &b->b.flags)) {
1705 putdref(b, MKREF(cfi_ino));
1711 lafs_iolock_written(&imni->mb->b);
1712 set_bit(B_PinPending, &imni->mb->b.flags);
1713 lafs_iounlock_block(&imni->mb->b);
1715 set_bit(B_PinPending, &b->b.flags);
1722 inode_map_new_pin(struct inode_map_new_info *imni)
1726 err = lafs_pin_dblock(imni->mb, NewSpace);
1727 err = err ?: lafs_pin_dblock(imni->ib, NewSpace);
1732 inode_map_new_commit(struct inode_map_new_info *imni)
1737 int blksize = imni->ib->b.inode->i_sb->s_blocksize;
1738 int bit = imni->ib->b.fileaddr & (blksize*8 - 1);
1740 struct inode *ino = imni->mb->b.inode;
1742 mutex_lock_nested(&ino->i_mutex, I_MUTEX_QUOTA);
1743 buf = map_dblock(imni->mb);
1744 generic___clear_le_bit(bit, buf);
1745 if (buf[blksize/sizeof(*buf)-1] == 0 &&
1746 generic_find_next_le_bit(buf, blksize*8, 0) == blksize*8)
1747 /* block is empty, punch a hole */
1750 unmap_dblock(imni->mb, buf);
1752 lafs_erase_dblock(imni->mb);
1754 lafs_dirty_dblock(imni->mb);
1756 putdref(imni->mb, MKREF(cfi_map));
1757 mutex_unlock(&ino->i_mutex);
1760 putdref(imni->ib, MKREF(cfi_ino));
1764 inode_map_new_abort(struct inode_map_new_info *imni)
1767 clear_bit(B_Claimed, &imni->ib->b.flags);
1768 clear_bit(B_PinPending, &imni->ib->b.flags);
1769 lafs_orphan_release(fs_from_inode(imni->ib->b.inode),
1772 putdref(imni->ib, MKREF(cfi_ino));
1774 struct inode *ino = imni->mb->b.inode;
1775 putdref(imni->mb, MKREF(cfi_map));
1781 lafs_new_inode(struct fs *fs, struct inode *fsys, struct inode *dir,
1782 int type, int inum, int mode, struct datablock **inodbp)
1784 /* allocate and instantiate a new inode. If inum is non-zero,
1785 * choose any number, otherwise we are creating a special inode
1786 * and have to use the given number.
1787 * This creation is committed independently of any name that might
1788 * subsequently be given to the inode. So we register it as an
1789 * orphan so that it will be cleaned up if the name isn't
1790 * successfully created
1794 struct datablock *b;
1795 struct inode_map_new_info imni;
1796 struct update_handle ui;
1799 err = inode_map_new_prepare(fs, inum, fsys, &imni);
1800 err = lafs_cluster_update_prepare(&ui, fs, sizeof(struct la_inode))
1803 err = lafs_make_orphan(fs, imni.ib);
1807 lafs_checkpoint_lock(fs);
1809 err = inode_map_new_pin(&imni);
1811 if (err == -EAGAIN) {
1812 lafs_checkpoint_unlock_wait(fs);
1818 b = getdref(imni.ib, MKREF(inode_new));
1820 lafs_iolock_block(&b->b); /* make sure we don't race with the cleaner
1821 * and zero this inode while trying to load it
1823 lafs_inode_init(b, type, mode, dir);
1824 lafs_iounlock_block(&b->b);
1826 inode_map_new_commit(&imni);
1827 ino = lafs_iget(fsys, b->b.fileaddr, SYNC);
1829 lafs_cluster_update_abort(&ui);
1832 lafs_cluster_update_commit(&ui, b, 0,
1833 LAFSI(ino)->metadata_size);
1834 LAFS_BUG(LAFSI(ino)->dblock != b, &b->b);
1835 LAFS_BUG(b->my_inode != ino, &b->b);
1836 lafs_checkpoint_unlock(fs);
1841 putdref(b, MKREF(inode_new));
1845 lafs_checkpoint_unlock(fs);
1848 inode_map_new_abort(&imni);
1849 lafs_cluster_update_abort(&ui);
1850 dprintk("After abort %d: %s\n", err, strblk(&imni.ib->b));
1851 return ERR_PTR(err);
1854 static int inode_map_free(struct fs *fs, struct inode *fsys, u32 inum)
1856 struct inode *im = lafs_iget(fsys, 1, SYNC);
1859 struct datablock *b;
1863 mutex_lock_nested(&im->i_mutex, I_MUTEX_QUOTA);
1865 bnum = inum >> (3 + fs->blocksize_bits);
1866 bit = inum - (bnum << (3 + fs->blocksize_bits));
1867 b = lafs_get_block(im, bnum, NULL, GFP_KERNEL, MKREF(inode_map_free));
1869 mutex_unlock(&im->i_mutex);
1873 err = lafs_read_block(b);
1875 putdref(b, MKREF(inode_map_free));
1876 mutex_unlock(&im->i_mutex);
1880 lafs_iolock_written(&b->b);
1881 set_bit(B_PinPending, &b->b.flags);
1882 lafs_iounlock_block(&b->b);
1884 lafs_checkpoint_lock(fs);
1885 err = lafs_pin_dblock(b, ReleaseSpace);
1886 if (err == -EAGAIN) {
1887 lafs_checkpoint_unlock_wait(fs);
1891 buf = map_dblock(b);
1892 generic___set_le_bit(bit, buf);
1893 unmap_dblock(b, buf);
1894 lafs_dirty_dblock(b);
1895 putdref(b, MKREF(inode_map_free));
1896 lafs_checkpoint_unlock(fs);
1897 mutex_unlock(&im->i_mutex);
1902 int lafs_inode_inuse(struct fs *fs, struct inode *fsys, u32 inum)
1904 /* This is used during roll-forward to register a newly created
1905 * inode in the inode map
1907 struct inode *im = lafs_iget(fsys, 1, SYNC);
1910 struct datablock *b;
1914 mutex_lock_nested(&im->i_mutex, I_MUTEX_QUOTA);
1916 bnum = inum >> (3 + fs->blocksize_bits);
1917 bit = inum - (bnum << (3 + fs->blocksize_bits));
1918 if (bnum > LAFSI(im)->md.inodemap.size) {
1919 /* inum to unbelievably big */
1920 mutex_unlock(&im->i_mutex);
1924 b = lafs_get_block(im, bnum, NULL, GFP_KERNEL, MKREF(inode_map_free));
1926 mutex_unlock(&im->i_mutex);
1931 err = lafs_read_block(b);
1933 putdref(b, MKREF(inode_map_free));
1934 mutex_unlock(&im->i_mutex);
1939 lafs_iolock_written(&b->b);
1940 set_bit(B_PinPending, &b->b.flags);
1941 lafs_iounlock_block(&b->b);
1943 lafs_checkpoint_lock(fs);
1944 err = lafs_pin_dblock(b, CleanSpace);
1945 if (err == -EAGAIN) {
1946 lafs_checkpoint_unlock_wait(fs);
1950 buf = map_dblock(b);
1951 if (bnum == LAFSI(im)->md.inodemap.size) {
1952 /* need to add a new block to the file */
1953 memset(buf, 0xff, fs->blocksize);
1954 LAFSI(im)->md.inodemap.size = bnum + 1;
1955 lafs_dirty_inode(im);
1957 generic___clear_le_bit(bit, buf);
1958 unmap_dblock(b, buf);
1959 lafs_dirty_dblock(b);
1960 putdref(b, MKREF(inode_map_free));
1961 lafs_checkpoint_unlock(fs);
1962 mutex_unlock(&im->i_mutex);
1969 int lafs_setattr(struct dentry *dentry, struct iattr *attr)
1972 struct inode *ino = dentry->d_inode;
1973 struct fs *fs = fs_from_inode(ino);
1974 struct datablock *db;
1976 err = inode_change_ok(ino, attr);
1977 db = lafs_inode_dblock(ino, SYNC, MKREF(setattr));
1983 /* We don't need iolock_written here as we don't
1984 * actually change the inode block yet
1986 lafs_iolock_block(&db->b);
1987 set_bit(B_PinPending, &db->b.flags);
1988 lafs_iounlock_block(&db->b);
1990 /* FIXME quota stuff */
1993 lafs_checkpoint_lock(fs);
1994 err = lafs_pin_dblock(db, ReleaseSpace);
1995 if (err == -EAGAIN) {
1996 lafs_checkpoint_unlock_wait(fs);
2001 if ((attr->ia_valid & ATTR_SIZE) &&
2002 attr->ia_size != i_size_read(ino))
2003 truncate_setsize(ino, attr->ia_size);
2004 setattr_copy(ino, attr);
2005 mark_inode_dirty(ino);
2007 lafs_dirty_dblock(db);
2009 clear_bit(B_PinPending, &db->b.flags);
2010 putdref(db, MKREF(setattr));
2011 lafs_checkpoint_unlock(fs);
2016 void lafs_truncate(struct inode *ino)
2018 /* Want to truncate this file.
2019 * i_size has already been changed, and the address space
2020 * has been cleaned up.
2021 * So just start the background truncate
2023 struct fs *fs = fs_from_inode(ino);
2024 struct datablock *db = lafs_inode_dblock(ino, SYNC, MKREF(trunc));
2031 trunc_block = ((i_size_read(ino) + fs->blocksize - 1)
2032 >> fs->blocksize_bits);
2033 /* We hold i_mutex, so regular orphan processing cannot
2034 * contine - we have to push it forward ourselves.
2036 while (test_bit(I_Trunc, &LAFSI(ino)->iflags) &&
2037 LAFSI(ino)->trunc_next < trunc_block) {
2038 prepare_to_wait(&fs->async_complete, &wq,
2039 TASK_UNINTERRUPTIBLE);
2040 lafs_inode_handle_orphan(db);
2041 if (test_bit(B_Orphan, &db->b.flags))
2044 finish_wait(&fs->async_complete, &wq);
2046 /* There is nothing we can do about errors here. The
2047 * most likely are ENOMEM which itself is very unlikely.
2048 * If this doesn't get registered as an orphan .... maybe
2049 * it will have to wait until something else truncates it.
2051 lafs_make_orphan(fs, db);
2053 if (!test_and_set_bit(I_Trunc, &LAFSI(ino)->iflags))
2055 if (trunc_block == 0)
2056 LAFSI(ino)->trunc_gen++;
2057 LAFSI(ino)->trunc_next = trunc_block;
2058 putdref(db, MKREF(trunc));
2061 const struct inode_operations lafs_special_ino_operations = {
2062 .setattr = lafs_setattr,
2063 .getattr = lafs_getattr,
2064 .truncate = lafs_truncate,