4 * Copyright (C) 2005-2009
5 * Neil Brown <neilb@suse.de>
6 * Released under the GPL, version 2
8 * generic inode handling
13 #include <linux/random.h>
14 #include <linux/delay.h>
15 #include <linux/slab.h>
17 static void check_atime_ref(struct inode *ino, int async);
19 /* Supporting an async 'iget' - as required by the cleaner -
20 * is slightly non-trivial.
21 * iget*_locked will normally wait for any inode with one
22 * of the flags I_FREEING I_CLEAR I_WILL_FREE I_NEW
23 * to either be unhashed or has the flag cleared.
24 * We cannot afford that wait in the cleaner as we could deadlock.
25 * So we use iget5_locked and provide a test function that fails
26 * if it finds the inode with any of those flags set.
27 * If it does see the inode like that it clear the inum
28 * that is passed in (by reference) so that it knows to continue
29 * failing (for consistency) and so that the 'set' function
30 * we provide can know to fail the 'set'.
31 * The result of this is that if iget finds an inode it would
32 * have to wait on, the inum is cleared and NULL is returned.
33 * An unfortunate side effect is that an inode will be allocated
34 * and then destroyed to no avail.
35 * This is avoided by calling ilookup5 first. This also allows
36 * us to only allocate/load the data block if there really seems
39 #define NO_INO (~(ino_t)0)
40 static int async_itest(struct inode *inode, void *data)
46 /* found and is freeing */
48 if (inode->i_ino != inum)
50 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) {
57 static int async_iset(struct inode *inode, void *data)
62 inode->i_ino = *inump;
67 lafs_iget(struct super_block *sb, ino_t inum, int async)
69 /* find, and load if needed, this inum */
70 struct inode *ino = NULL;
72 struct datablock *b = NULL;
73 struct inode *inodefile;
77 BUG_ON(inum == NO_INO);
83 /* We cannot afford to block on 'freeing_inode'
84 * So use iget5_locked and refuse to match such
86 * If the inode is 'freeing', inum gets set to NO_INO.
87 * ilookup5 is used first to avoid an unnecessary
88 * alloc/free if the inode is locked in some way.
93 ino = ilookup5(sb, inum, async_itest, &inum2);
100 /* For async we will always want the dblock loaded,
101 * and we need to load it first as we cannot afford
102 * to fail -EAGAIN once we have an I_NEW inode.
105 b = lafs_get_block(inodefile, inum, NULL,
106 GFP_NOFS, MKREF(iget));
108 return ERR_PTR(-ENOMEM);
111 err = lafs_read_block_async(b);
114 /* Have the block, so safe to iget */
116 ino = iget5_locked(sb, inum,
117 async_itest, async_iset,
127 if (test_and_set_bit(B_Async, &b->b.flags)) {
128 putdref(b, MKREF(iget));
131 getdref(b, MKREF(async));
135 ino = iget_locked(sb, inum);
138 putdref(b, MKREF(iget));
139 return ERR_PTR(-ENOMEM);
142 if (!(ino->i_state & I_NEW)) {
143 putdref(b, MKREF(iget));
145 check_atime_ref(ino, async);
149 return ERR_PTR(-ENOENT);
152 LAFSI(ino)->filesys = igrab(inodefile);
154 /* surprisingly the inode bdi does not default to the
155 * super_blocks bdi...
157 ino->i_data.backing_dev_info = sb->s_bdi;
158 /* Need to load block 'inum' from an inode file...
161 b = lafs_get_block(inodefile, inum, NULL, GFP_KERNEL, MKREF(iget));
165 err = lafs_read_block(b);
170 oldino = rcu_my_inode(b);
172 /* The inode is new, but the block thinks it has an
173 * old inode, so we must be in the process of destroying
175 * So fail the lookup without even looking at the content
176 * of the block (Which might not be clear yet).
178 spin_lock(&oldino->i_data.private_lock);
179 if (!test_bit(I_Deleting, &LAFSI(oldino)->iflags)) {
181 LAFSI(oldino)->dblock = NULL;
182 LAFSI(oldino)->iblock = NULL;
184 spin_unlock(&oldino->i_data.private_lock);
192 err = lafs_import_inode(ino, b);
195 printk("lafs_import_inode failed %d\n", err);
198 check_atime_ref(ino, async);
199 unlock_new_inode(ino);
201 if (b && test_and_clear_bit(B_Async, &b->b.flags)) {
202 putdref(b, MKREF(async));
203 lafs_wake_thread(fs_from_sb(sb));
205 putdref(b, MKREF(iget));
209 unlock_new_inode(ino);
216 lafs_iget_fs(struct fs *fs, int fsnum, int inum, int async)
218 struct super_block *sb;
224 /* Need to locate or load the superblock for this
225 * subordinate filesystem
227 struct inode *filesys;
228 struct super_block *sb2;
230 filesys = lafs_iget(sb, fsnum, async);
233 if (LAFSI(filesys)->type != TypeInodeFile) {
235 return ERR_PTR(-ENOENT);
237 /* FIXME can get_subset_sb be async at all?? */
238 sb2 = lafs_get_subset_sb(filesys);
241 return ERR_PTR(PTR_ERR(sb2));
243 rv = lafs_iget(sb2, inum, async);
245 deactivate_locked_super(sb2);
247 up_write(&sb2->s_umount);
249 rv = lafs_iget(sb, inum, async);
251 atomic_inc(&sb->s_active);
257 lafs_import_inode(struct inode *ino, struct datablock *b)
259 struct la_inode *lai = map_dblock(b);
260 struct lafs_inode *li = LAFSI(ino);
263 if (lai->filetype == 0) {
270 ino->i_mode = S_IFREG;
271 ino->i_nlink = 1; /* For special file, set nlink so they
272 * never appear unlinked */
276 LAFS_BUG(ino->i_ino != b->b.fileaddr, &b->b);
277 li->cblocks = le32_to_cpu(lai->data_blocks);
278 li->pblocks = li->ablocks = 0;
279 li->vfs_inode.i_blocks = ((blkcnt_t)li->cblocks
280 << (ino->i_sb->s_blocksize_bits - 9));
281 li->ciblocks = le32_to_cpu(lai->index_blocks);
285 ino->i_generation = le16_to_cpu(lai->generation);
286 li->trunc_gen = lai->trunc_gen;
287 li->flags = lai->flags;
288 li->type = lai->filetype;
289 li->metadata_size = le16_to_cpu(lai->metadata_size);
290 li->depth = lai->depth;
292 dprintk("inode %lu type is %d\n", (unsigned long)ino->i_ino, li->type);
294 ino->i_data.a_ops = &lafs_file_aops;
300 struct fs_md *i = &li->md.fs;
301 struct fs_metadata *l = &lai->metadata[0].fs;
304 i->usagetable = le16_to_cpu(l->snapshot_usage_table);
305 decode_time(&ino->i_mtime, le64_to_cpu(l->update_time));
306 i->cblocks_used = le64_to_cpu(l->blocks_used);
307 i->pblocks_used = i->ablocks_used = 0;
308 i->blocks_allowed = le64_to_cpu(l->blocks_allowed);
309 i->blocks_unalloc = 0;
310 i->creation_age = le64_to_cpu(l->creation_age);
311 i->inodes_used = le32_to_cpu(l->inodes_used);
312 i->quota_inums[0] = le32_to_cpu(l->quota_inodes[0]);
313 i->quota_inums[1] = le32_to_cpu(l->quota_inodes[1]);
314 i->quota_inums[2] = le32_to_cpu(l->quota_inodes[2]);
315 i->quota_inodes[0] = i->quota_inodes[1]
316 = i->quota_inodes[2] = NULL;
317 nlen = li->metadata_size - offsetof(struct la_inode,
318 metadata[0].fs.name);
319 i->accesstime = NULL;
325 /* Need to unmap the dblock to kmalloc because
326 * the mapping makes us 'atomic'
328 unmap_dblock(b, lai);
329 i->name = kmalloc(nlen+1, GFP_KERNEL);
331 l = &lai->metadata[0].fs;
336 memcpy(i->name, l->name, nlen);
339 /* Make this look like a directory */
340 ino->i_mode = S_IFDIR;
344 ino->i_op = &lafs_subset_ino_operations;
345 ino->i_fop = &lafs_subset_file_operations;
351 struct inodemap_md *m = &li->md.inodemap;
352 struct inodemap_metadata *s = &lai->metadata[0].inodemap;
353 m->size = le32_to_cpu(s->size);
354 m->thisblock = NoBlock;
361 struct su_md *m = &li->md.segmentusage;
362 struct su_metadata *s = &lai->metadata[0].segmentusage;
363 m->table_size = le32_to_cpu(s->table_size);
369 struct quota_md *m = &li->md.quota;
370 struct quota_metadata *s = &lai->metadata[0].quota;
371 m->gracetime = le32_to_cpu(s->gracetime);
372 m->graceunits = le32_to_cpu(s->graceunits);
377 struct orphan_md *m = &li->md.orphan;
378 /* This will be set via lafs_count_orphans */
386 default: /* TypeBase or larger */
388 struct file_md *i = &li->md.file;
389 struct file_metadata *l = &lai->metadata[0].file;
390 struct dir_metadata *d = &lai->metadata[0].dir;
391 struct special_metadata *s = &lai->metadata[0].special;
393 if (li->type < TypeBase)
395 i->flags = le16_to_cpu(l->flags);
396 ino->i_mode = le16_to_cpu(l->mode);
397 ino->i_uid = le32_to_cpu(l->userid);
398 ino->i_gid = le32_to_cpu(l->groupid);
399 i->treeid = le32_to_cpu(l->treeid);
400 i->creationtime = le64_to_cpu(l->creationtime);
401 decode_time(&ino->i_mtime, le64_to_cpu(l->modifytime));
402 decode_time(&ino->i_ctime, le64_to_cpu(l->ctime));
403 decode_time(&i->i_accesstime, le64_to_cpu(l->accesstime));
404 ino->i_atime = i->i_accesstime;
405 i->atime_offset = 0; /* Will be filled-in later probably */
406 lafs_add_atime_offset(&ino->i_atime, i->atime_offset);
407 ino->i_size = le64_to_cpu(l->size);
408 i->parent = le32_to_cpu(l->parent);
409 ino->i_nlink = le32_to_cpu(l->linkcount);
410 if (ino->i_nlink == 0 && list_empty(&b->orphans) &&
411 fs_from_inode(ino)->rolled) {
412 /* This block should already be on the orphan
413 * list, otherwise there is a filesystem
415 * Either the orphan file is wrong, or the
416 * linkcount is wrong.
417 * It is safest to assume the later - either
418 * way an FS check would be needed to fix it.
419 * Note: while roll-forward is happening, this
420 * situation is perfectly possible and is handled
423 /* FIXME set a superblock flag requesting
424 * directory linkage checking
429 dprintk(" mode = 0%o uid %d size %lld\n",
430 ino->i_mode, ino->i_uid, ino->i_size);
433 ino->i_op = &lafs_file_ino_operations;
434 ino->i_fop = &lafs_file_file_operations;
435 ino->i_mode = (ino->i_mode & 07777) | S_IFREG;
438 i->seed = le32_to_cpu(d->hash_seed);
439 ino->i_op = &lafs_dir_ino_operations;
440 ino->i_fop = &lafs_dir_file_operations;
441 ino->i_mode = (ino->i_mode & 07777) | S_IFDIR;
444 dprintk("Hmm. %d %d %d\n",
451 ino->i_op = &lafs_link_ino_operations;
452 ino->i_mode = (ino->i_mode & 07777) | S_IFLNK;
455 /* the data had better be in the inode ... */
456 ino->i_rdev = MKDEV(le32_to_cpu(s->major),
457 le32_to_cpu(s->minor));
458 ino->i_op = &lafs_special_ino_operations;
459 init_special_inode(ino, ino->i_mode, ino->i_rdev);
466 ino->i_blkbits = ino->i_sb->s_blocksize_bits;
467 /* FIXME i_blocks and i_byte - used for quota?? */
470 /* Note: no refcount yet. Either will remove the reference to the
474 rcu_assign_pointer(b->my_inode, ino);
478 printk("inode %lu type is %d\n",
479 (unsigned long)ino->i_ino, li->type);
480 unmap_dblock(b, lai);
484 static void check_atime_ref(struct inode *ino, int async)
486 /* If there is an time file in this filesystem the inode
487 * should hold a reference to the relevant block in
490 struct inode *root, *at;
494 /* Never bother for async lookups */
496 if (LAFSI(ino)->type < TypeBase)
498 if (test_bit(I_AccessTime, &LAFSI(ino)->iflags))
500 root = LAFSI(ino)->filesys;
501 at = LAFSI(root)->md.fs.accesstime;
505 if (LAFSI(ino)->md.file.atime_offset)
506 LAFSI(ino)->md.file.atime_offset = 0;
508 /* "* 2" to get byte number, then shift to get block
511 bnum = ino->i_ino >> (at->i_blkbits-1);
512 b = lafs_get_block(at, bnum, NULL, GFP_NOFS, MKREF(atime));
514 if (lafs_read_block(b) == 0) {
518 i = (ino->i_ino * 2) & ((1<<at->i_blkbits)-1);
519 LAFSI(ino)->md.file.atime_offset = le16_to_cpu(atp[i]);
520 set_bit(I_AccessTime, &LAFSI(ino)->iflags);
521 unmap_dblock(b, atp);
522 lafs_add_atime_offset(&ino->i_atime,
523 LAFSI(ino)->md.file.atime_offset);
525 putdref(b, MKREF(atime));
529 void lafs_add_atime_offset(struct timespec *atime, int offset)
536 expon = offset & 0x1f;
538 mantissa = (offset >> 5) | 0x800;
540 mantissa = (offset >> 5);
543 mantissa <<= expon-11;
544 atime->tv_sec += mantissa;
548 mantissa <<= expon-1;
549 timespec_add_ns(atime, (s64)mantissa * 1000000);
553 static int normalise(int *mantissa)
555 /* Shift down until value can be stored in 12 bits:
556 * Top bit will be '1', so only 11 bits needed.
557 * Not used on values below 2048.
560 while (*mantissa >= 4096) {
567 static int update_atime_delta(struct inode *ino)
569 /* calculate new delta to show the difference between
570 * i_atime and i_accesstime
573 if (LAFSI(ino)->type < TypeBase)
575 if (timespec_compare(&ino->i_atime,
576 &LAFSI(ino)->md.file.i_accesstime) <= 0) {
577 /* We cannot store negative delta so if i_atime is in the
578 * past, just store zero
582 struct timespec diff;
585 diff = timespec_sub(ino->i_atime,
586 LAFSI(ino)->md.file.i_accesstime);
587 if (diff.tv_sec >= 2048) {
588 /* Just store the seconds */
590 shift = normalise(&rv) + 11;
592 /* Store the milliseconds */
593 int rv = diff.tv_nsec / 1000000;
594 rv += diff.tv_sec * 1000;
596 shift = normalise(&rv) + 1;
608 if (LAFSI(ino)->md.file.atime_offset == rv)
611 LAFSI(ino)->md.file.atime_offset = rv;
615 static void store_atime_delta(struct inode *ino)
623 if (!test_bit(I_AccessTime, &LAFSI(ino)->iflags))
624 /* sorry, nothing we can do here */
627 /* We own a reference, so this lookup must succeed */
628 at = LAFSI(LAFSI(ino)->filesys)->md.fs.accesstime;
629 bnum = ino->i_ino >> (at->i_blkbits-1);
630 b = lafs_get_block(at, bnum, NULL, GFP_NOFS, MKREF(store_atime));
633 i = (ino->i_ino * 2) & ((1<<at->i_blkbits)-1);
634 if (le16_to_cpu(atp[i]) != LAFSI(ino)->md.file.atime_offset) {
635 atp[i] = cpu_to_le16(LAFSI(ino)->md.file.atime_offset);
636 lafs_dirty_dblock(b);
638 unmap_dblock(b, atp);
639 putdref(b, MKREF(store_atime));
642 void lafs_inode_checkpin(struct inode *ino)
644 /* Make sure I_Pinned is set correctly.
645 * It should be set precisely if i_nlink is non-zero,
646 * and ->iblock is B_Pinned.
647 * When it is set, we own a reference to the inode.
649 * This needs to be called whenever we change
650 * i_nlink, and whenever we pin or unpin an InoIdx
653 if (ino->i_nlink == 0) {
654 /* I_Pinned should not be set */
655 if (test_and_clear_bit(I_Pinned, &LAFSI(ino)->iflags)) {
656 if (ino->i_sb->s_type == &lafs_fs_type)
662 /* Need to check if iblock is Pinned. */
663 struct indexblock *ib = NULL;
664 if (LAFSI(ino)->iblock) {
665 spin_lock(&ino->i_data.private_lock);
666 ib = LAFSI(ino)->iblock;
667 if (ib && !test_bit(B_Pinned, &ib->b.flags))
669 spin_unlock(&ino->i_data.private_lock);
672 if (!test_and_set_bit(I_Pinned, &LAFSI(ino)->iflags)) {
673 if (ino->i_sb->s_type == &lafs_fs_type)
679 if (test_and_clear_bit(I_Pinned, &LAFSI(ino)->iflags)) {
680 if (ino->i_sb->s_type == &lafs_fs_type)
689 struct datablock *lafs_inode_get_dblock(struct inode *ino, REFARG)
691 struct datablock *db;
693 spin_lock(&ino->i_data.private_lock);
694 db = LAFSI(ino)->dblock;
696 if (db->b.inode == ino)
697 getdref_locked(db, REF);
699 spin_lock_nested(&db->b.inode->i_data.private_lock, 1);
700 getdref_locked(db, REF);
701 spin_unlock(&db->b.inode->i_data.private_lock);
704 spin_unlock(&ino->i_data.private_lock);
708 struct datablock *lafs_inode_dblock(struct inode *ino, int async, REFARG)
710 struct datablock *db;
713 db = lafs_inode_get_dblock(ino, REF);
716 db = lafs_get_block(LAFSI(ino)->filesys, ino->i_ino, NULL,
719 return ERR_PTR(-ENOMEM);
721 LAFSI(ino)->dblock = db;
722 rcu_assign_pointer(db->my_inode, ino);
724 err = lafs_read_block_async(db);
726 err = lafs_read_block(db);
734 void lafs_inode_init(struct datablock *b, int type, int mode, struct inode *dir)
736 /* A new block has been allocated in an inode file to hold an
737 * inode. We get to fill in initial values so that when
738 * 'iget' calls lafs_import_inode, the correct inode is
741 struct fs *fs = fs_from_inode(b->b.inode);
742 struct la_inode *lai = map_dblock(b);
745 lai->data_blocks = cpu_to_le32(0);
746 lai->index_blocks = cpu_to_le32(0);
747 get_random_bytes(&lai->generation, sizeof(lai->generation));
750 lai->filetype = type;
756 struct fs_metadata *l = &lai->metadata[0].fs;
757 size = sizeof(struct fs_metadata);
760 l->blocks_allowed = 0;
761 l->creation_age = fs->wc[0].cluster_seq;
763 l->quota_inodes[0] = 0;
764 l->quota_inodes[1] = 0;
765 l->quota_inodes[2] = 0;
766 l->snapshot_usage_table = 0;
768 /* name will be zero length and not used */
773 struct inodemap_metadata *l = &lai->metadata[0].inodemap;
775 size = sizeof(struct inodemap_metadata);
779 size = sizeof(struct su_metadata);
782 size = sizeof(struct quota_metadata);
792 struct file_metadata *l = &lai->metadata[0].file;
793 struct timespec now = CURRENT_TIME;
795 l->flags = cpu_to_le16(0);
796 l->userid = cpu_to_le32(current->cred->fsuid);
797 if (dir && (dir->i_mode & S_ISGID)) {
798 l->groupid = cpu_to_le32(dir->i_gid);
802 l->groupid = cpu_to_le32(current->cred->fsgid);
803 if (dir && LAFSI(dir)->md.file.treeid)
804 l->treeid = cpu_to_le32(LAFSI(dir)->md.file.treeid);
806 l->treeid = l->userid;
808 l->mode = cpu_to_le16(mode);
809 l->creationtime = encode_time(&now);
810 l->modifytime = l->creationtime;
811 l->ctime = l->creationtime;
812 l->accesstime = l->creationtime;
814 l->parent = dir ? cpu_to_le32(dir->i_ino) : 0;
817 if (type == TypeDir) {
818 struct dir_metadata *l = &lai->metadata[0].dir;
820 get_random_bytes(&seed,
822 seed = (seed & ~7) | 1;
823 l->hash_seed = cpu_to_le32(seed);
824 size = sizeof(struct dir_metadata);
825 } else if (type == TypeSpecial) {
826 struct special_metadata *s = &lai->metadata[0].special;
827 s->major = s->minor = 0;
828 size = sizeof(struct special_metadata);
830 size = sizeof(struct file_metadata);
833 size += sizeof(struct la_inode);
834 lai->metadata_size = cpu_to_le32(size);
835 memset(((char *)lai)+size, 0, fs->blocksize-size);
836 *(u16 *)(((char *)lai)+size) = cpu_to_le16(IBLK_EXTENT);
838 unmap_dblock(b, lai);
839 set_bit(B_Valid, &b->b.flags);
840 LAFS_BUG(!test_bit(B_Pinned, &b->b.flags), &b->b);
841 lafs_dirty_dblock(b);
844 void lafs_clear_inode(struct inode *ino)
846 struct lafs_inode *li = LAFSI(ino);
847 dprintk("CLEAR INODE %d\n", (int)ino->i_ino);
851 /* Now is a good time to break the linkage between
852 * inode and dblock - but not if the file is
855 if (!test_bit(I_Deleting, &LAFSI(ino)->iflags)) {
856 struct datablock *db;
857 spin_lock(&ino->i_data.private_lock);
858 db = LAFSI(ino)->dblock;
860 struct indexblock *ib = LAFSI(ino)->iblock;
861 LAFS_BUG(ib && atomic_read(&ib->b.refcnt), &db->b);
863 LAFSI(ino)->dblock = NULL;
864 LAFSI(ino)->iblock = NULL;
866 spin_unlock(&ino->i_data.private_lock);
869 /* FIXME release quota inodes if filesystem */
872 static int inode_map_free(struct fs *fs, struct super_block *sb, u32 inum);
874 void lafs_delete_inode(struct inode *ino)
876 struct fs *fs = fs_from_inode(ino);
879 if (ino->i_mode == 0) {
880 /* There never was an inode here,
886 dprintk("DELETE INODE %d\n", (int)ino->i_ino);
888 /* Normal truncation holds an igrab, so we cannot be
889 * deleted until any truncation finishes
891 BUG_ON(test_bit(I_Trunc, &LAFSI(ino)->iflags));
893 b = lafs_inode_dblock(ino, SYNC, MKREF(delete_inode));
895 i_size_write(ino, 0);
896 truncate_inode_pages(&ino->i_data, 0);
897 LAFSI(ino)->trunc_next = 0;
898 set_bit(I_Deleting, &LAFSI(ino)->iflags);
899 set_bit(I_Trunc, &LAFSI(ino)->iflags);
903 set_bit(B_Claimed, &b->b.flags);
904 lafs_add_orphan(fs, b);
905 dprintk("PUNCH hole for %d\n", (int)b->b.fileaddr);
906 putdref(b, MKREF(delete_inode));
908 inode_map_free(fs, ino->i_sb, ino->i_ino);
913 static int prune(void *data, u32 addr, u64 paddr, int len)
915 /* This whole index block is being pruned, just account
916 * for everything and it will be cleared afterwards
918 struct indexblock *ib = data;
919 struct inode *ino = ib->b.inode;
920 struct fs *fs = fs_from_inode(ino);
921 int ph = !!test_bit(B_Phase1, &ib->b.flags);
923 dprintk("PRUNE %d for %d at %lld\n", addr, len, (long long)paddr);
924 if (paddr == 0 || len == 0)
926 for (i = 0 ; i < len ; i++)
927 lafs_summary_update(fs, ino, paddr+i, 0, 0, ph, 0);
931 static int prune_some(void *data, u32 addr, u64 paddr, int len)
933 /* Part of this index block is being pruned. Copy
934 * what addresses we can into uninc_table so that
935 * it can be 'incorporated'
936 * We should probably share some code with
937 * lafs_allocated_block??
939 struct indexblock *ib = data;
940 struct inode *ino = ib->b.inode;
941 struct fs *fs = fs_from_inode(ino);
942 int ph = !!test_bit(B_Phase1, &ib->b.flags);
945 if (paddr == 0 || len == 0)
947 dprintk("PRUNE2 %d for %d at %lld\n", addr, len, (long long)paddr);
948 for (i = 0 ; i < len ; i++) {
949 /* FIXME should allow longer truncation ranges in uninc_table
950 * as they are easy to handle.
953 if (addr + i < LAFSI(ino)->trunc_next)
955 spin_lock(&ino->i_data.private_lock);
956 a = &ib->uninc_table.pending_addr
957 [ib->uninc_table.pending_cnt - 1];
958 if (ib->uninc_table.pending_cnt <
959 ARRAY_SIZE(ib->uninc_table.pending_addr)) {
961 a->fileaddr = addr + i;
964 LAFS_BUG(!test_bit(B_Pinned, &ib->b.flags), &ib->b);
965 ib->uninc_table.pending_cnt++;
967 spin_unlock(&ino->i_data.private_lock);
970 spin_unlock(&ino->i_data.private_lock);
971 lafs_summary_update(fs, ino, paddr+i, 0, 0, ph, 0);
976 int lafs_inode_handle_orphan(struct datablock *b)
978 /* Don't need rcu protection for my_inode run_orphan
981 struct indexblock *ib, *ib2;
982 struct inode *ino = b->my_inode;
983 struct fs *fs = fs_from_inode(ino);
984 u32 trunc_next, next_trunc;
988 if (!test_bit(I_Trunc, &LAFSI(ino)->iflags)) {
989 if (test_bit(I_Deleting, &LAFSI(ino)->iflags)) {
990 LAFS_BUG(ino->i_nlink, &b->b);
991 if (LAFSI(ino)->cblocks +
992 LAFSI(ino)->pblocks +
993 LAFSI(ino)->ablocks +
994 LAFSI(ino)->ciblocks +
995 LAFSI(ino)->piblocks)
996 printk("Deleting inode %lu: %ld+%ld+%ld %ld+%ld\n",
1000 LAFSI(ino)->ablocks,
1001 LAFSI(ino)->ciblocks,
1002 LAFSI(ino)->piblocks);
1003 BUG_ON(LAFSI(ino)->cblocks +
1004 LAFSI(ino)->pblocks +
1005 LAFSI(ino)->ablocks +
1006 LAFSI(ino)->ciblocks +
1007 LAFSI(ino)->piblocks);
1008 if (lafs_erase_dblock_async(b))
1009 lafs_orphan_release(fs, b);
1010 } else if (ino->i_nlink || LAFSI(ino)->type == 0)
1011 lafs_orphan_release(fs, b);
1013 lafs_orphan_forget(fs, b);
1017 ib = lafs_make_iblock(ino, ADOPT, SYNC, MKREF(inode_handle_orphan));
1021 /* Here is the guts of 'truncate'. We find the next leaf index
1022 * block and discard all the addresses there-in.
1024 trunc_next = LAFSI(ino)->trunc_next;
1026 if (trunc_next == 0xFFFFFFFF) {
1027 /* truncate has finished in that all data blocks
1028 * have been removed and all index block are either
1029 * gone or pending incorporation at which point they will
1031 * If we hit a phase change, we will need to postpone
1032 * the rest of the cleaning until it completes.
1033 * If there is a checkpoint happening, then all the work
1034 * that we can do now, it will do for us. So just
1037 struct indexblock *tmp;
1038 struct indexblock *next;
1041 if (!test_bit(B_Pinned, &ib->b.flags)) {
1042 /* must be finished */
1043 LAFS_BUG(test_bit(B_Dirty, &ib->b.flags), &ib->b);
1044 clear_bit(I_Trunc, &LAFSI(ino)->iflags);
1046 wake_up(&fs->trunc_wait);
1050 if (fs->checkpointing) {
1051 /* This cannot happen with current code,
1052 * but leave it in case we ever have
1053 * orphan handling parallel with checkpoints
1055 err = -EBUSY; /* Try again after the checkpoint */
1059 lastaddr = (i_size_read(ino) +
1061 >> fs->blocksize_bits;
1062 /* Find a Pinned descendent of ib which has no
1063 * Pinned descendents and no PrimaryRef dependent
1064 * (so take the last).
1065 * Prefer blocks that are beyond EOF (again, take the last).
1066 * If there are none, descend the last block that
1067 * is not after EOF and look at its children.
1070 spin_lock(&ib->b.inode->i_data.private_lock);
1074 list_for_each_entry(tmp, &ib2->children, b.siblings) {
1075 if (!test_bit(B_Index, &tmp->b.flags) ||
1076 !test_bit(B_Pinned, &tmp->b.flags))
1079 tmp->b.fileaddr > next->b.fileaddr)
1083 if (ib2->b.fileaddr < lastaddr) {
1084 /* Must be all done */
1085 spin_unlock(&ib->b.inode->i_data.private_lock);
1086 clear_bit(I_Trunc, &LAFSI(ino)->iflags);
1088 wake_up(&fs->trunc_wait);
1092 getiref(ib2, MKREF(inode_handle_orphan2));
1093 spin_unlock(&ib->b.inode->i_data.private_lock);
1095 /* ib2 is an index block beyond EOF with no
1097 * Incorporating it should unpin it.
1099 if (!list_empty(&ib2->children)) {
1100 lafs_print_tree(&ib2->b, 3);
1101 LAFS_BUG(1, &ib2->b);
1104 if (!lafs_iolock_written_async(&ib2->b)) {
1105 putiref(ib2, MKREF(inode_handle_orphan2));
1109 while (ib2->uninc_table.pending_cnt || ib2->uninc)
1110 lafs_incorporate(fs, ib2);
1112 if (test_bit(B_Dirty, &ib2->b.flags) ||
1113 test_bit(B_Realloc, &ib2->b.flags))
1114 lafs_cluster_allocate(&ib2->b, 0);
1116 lafs_iounlock_block(&ib2->b);
1118 if (!list_empty(&ib2->b.siblings)) {
1119 printk("looping on %s\n", strblk(&ib2->b));
1124 putiref(ib2, MKREF(inode_handle_orphan2));
1127 if (lafs_iolock_written_async(&ib->b)) {
1129 lafs_incorporate(fs, ib);
1130 lafs_iounlock_block(&ib->b);
1135 putiref(ib, MKREF(inode_handle_orphan));
1139 putiref(ib, MKREF(inode_handle_orphan));
1141 ib = lafs_leaf_find(ino, trunc_next, ADOPT, &next_trunc,
1142 ASYNC, MKREF(inode_handle_orphan3));
1145 /* now hold an iolock on ib */
1147 /* Ok, trunc_next seems to refer to a block that exists.
1148 * We need to erase it..
1150 * So we open up the index block ourselves, call
1151 * lafs_summary_update with each block address, and then
1155 if (LAFSI(ino)->depth == 0) {
1156 /* Nothing to truncate */
1157 clear_bit(I_Trunc, &LAFSI(ino)->iflags);
1159 if (test_bit(B_Pinned, &ib->b.flags))
1160 /* Need to move the dirtiness which keeps this
1161 * pinned to the data block.
1163 lafs_cluster_allocate(&ib->b, 0);
1165 lafs_iounlock_block(&ib->b);
1170 lafs_checkpoint_lock(fs);
1171 err = lafs_reserve_block(&ib->b, ReleaseSpace);
1175 if (!test_bit(B_Valid, &ib->b.flags) &&
1176 test_bit(B_InoIdx, &ib->b.flags)) {
1177 /* still invalid, just re-erase to remove
1179 LAFSI(ino)->trunc_next = next_trunc;
1180 lafs_cluster_allocate(&ib->b, 0);
1185 lafs_pin_block(&ib->b);
1187 /* It might be that this can happen, in which case
1188 * we simply update trunc_next and loop. But I'd like
1189 * to be sure before I implement that
1191 if (!test_bit(B_Valid, &ib->b.flags)) {
1192 printk("Not Valid: %s\n", strblk(&ib->b));
1193 printk("depth = %d\n", LAFSI(ino)->depth);
1194 if (test_bit(B_InoIdx, &ib->b.flags))
1195 printk("DB: %s\n", strblk(&LAFSI(ib->b.inode)->dblock->b));
1196 LAFSI(ino)->trunc_next = next_trunc;
1197 //BUG_ON(!test_bit(B_Valid, &ib->b.flags));
1202 if (ib->b.fileaddr < trunc_next &&
1203 lafs_leaf_next(ib, 0) < trunc_next) {
1204 /* We only want to truncate part of this index block.
1205 * So we copy addresses into uninc_table and then
1206 * call lafs_incorporate.
1207 * This might cause the index tree to grow, so we
1208 * cannot trust next_trunc
1210 if (ib->uninc_table.pending_cnt == 0 &&
1211 ib->uninc == NULL) {
1212 lafs_dirty_iblock(ib, 0);
1213 /* FIXME this just removes 8 blocks at a time,
1214 * which is not enough
1216 lafs_walk_leaf_index(ib, prune_some, ib);
1218 if (test_bit(B_Dirty, &ib->b.flags))
1219 lafs_incorporate(fs, ib);
1223 LAFSI(ino)->trunc_next = next_trunc;
1225 while (ib->uninc_table.pending_cnt || ib->uninc) {
1226 /* There should be no Realloc data blocks here
1227 * but index blocks might be realloc still.
1229 LAFS_BUG(!test_bit(B_Dirty, &ib->b.flags) &&
1230 !test_bit(B_Realloc, &ib->b.flags), &ib->b);
1231 lafs_incorporate(fs, ib);
1233 if (test_bit(B_InoIdx, &ib->b.flags) ||
1234 !test_bit(B_PhysValid, &ib->b.flags) ||
1235 ib->b.physaddr != 0) {
1236 lafs_walk_leaf_index(ib, prune, ib);
1237 lafs_clear_index(ib);
1238 lafs_dirty_iblock(ib, 0);
1240 if (test_bit(B_Dirty, &ib->b.flags))
1241 lafs_incorporate(fs, ib);
1242 if (!list_empty(&ib->children))
1243 lafs_print_tree(&ib->b, 2);
1244 LAFS_BUG(!list_empty(&ib->children), &ib->b);
1247 lafs_iounlock_block(&ib->b);
1249 lafs_checkpoint_unlock(fs);
1251 putiref(ib, MKREF(inode_handle_orphan3));
1255 void lafs_dirty_inode(struct inode *ino)
1257 /* this is called in one of three cases:
1258 * 1/ by lafs internally when dblock or iblock is pinned and
1259 * ready to be dirtied
1260 * 2/ by writeout before requesting a write - to update mtime
1261 * 3/ by read to update atime
1263 * We want to handle atime updates carefully as they may not change
1264 * the stored inode itself.
1265 * For all other updates, the inode dblock exists and is pinned.
1266 * In those cases we will be updating the inode and so can store
1267 * the atime exactly.
1268 * For an atime update, the dblock may not exists, or may not be
1269 * Pinned. If it isn't then we don't want to make the inode dirty
1270 * but only want to update the delta stored in the atime file.
1271 * The block for that should already be pinned.
1274 * We mustn't update the data block as it could be in
1275 * writeout and we cannot always wait safely.
1276 * So require that anyone who really cares, dirties the datablock
1277 * or a child themselves.
1278 * When cluster_allocate eventually gets called, it will update
1279 * the datablock from the inode.
1280 * If an update has to wait for the next phase, lock_dblock
1281 * (e.g. in setattr) will do that.
1283 * We also use this opportunity to update the filesystem modify time.
1285 struct timespec now;
1286 struct inode *filesys;
1289 if (LAFSI(ino)->dblock) {
1290 struct datablock *db;
1291 spin_lock(&ino->i_data.private_lock);
1292 db = LAFSI(ino)->dblock;
1293 if (db && test_bit(B_Pinned, &db->b.flags))
1295 spin_unlock(&ino->i_data.private_lock);
1299 if (update_atime_delta(ino))
1300 store_atime_delta(ino);
1304 set_bit(I_Dirty, &LAFSI(ino)->iflags);
1305 ino->i_sb->s_dirt = 1;
1307 if (LAFSI(ino)->type < TypeBase)
1309 LAFSI(ino)->md.file.i_accesstime = ino->i_atime;
1310 if (LAFSI(ino)->md.file.atime_offset) {
1311 LAFSI(ino)->md.file.atime_offset = 0;
1312 store_atime_delta(ino);
1315 now = current_fs_time(ino->i_sb);
1316 filesys = LAFSI(ino)->filesys;
1317 if (!timespec_equal(&filesys->i_mtime, &now)) {
1318 filesys->i_mtime = now;
1319 set_bit(I_Dirty, &LAFSI(filesys)->iflags);
1323 int lafs_sync_inode(struct inode *ino, int wait)
1325 /* fsync has been called on this file so we need
1326 * to sync any inode updates to the next cluster.
1328 * If we cannot create an update record,
1329 * we wait for a phase change, which writes everything
1332 struct datablock *b;
1333 struct fs *fs = fs_from_inode(ino);
1334 struct update_handle uh;
1338 if (LAFSI(ino)->update_cluster > 1)
1339 lafs_cluster_wait(fs, LAFSI(ino)->update_cluster);
1340 if (LAFSI(ino)->update_cluster == 1) {
1341 lafs_checkpoint_lock(fs);
1342 lafs_checkpoint_unlock_wait(fs);
1347 LAFSI(ino)->update_cluster = 0;
1348 if (!test_bit(I_Dirty, &LAFSI(ino)->iflags))
1350 b = lafs_inode_dblock(ino, SYNC, MKREF(write_inode));
1354 lafs_iolock_written(&b->b);
1355 lafs_inode_fillblock(ino);
1356 lafs_iounlock_block(&b->b);
1358 err = lafs_cluster_update_prepare(&uh, fs, LAFS_INODE_LOG_SIZE);
1360 lafs_cluster_update_abort(&uh);
1362 lafs_checkpoint_lock(fs);
1363 if (lafs_cluster_update_pin(&uh) == 0) {
1364 if (test_and_clear_bit(B_Dirty, &b->b.flags))
1365 lafs_space_return(fs, 1);
1366 LAFSI(ino)->update_cluster =
1367 lafs_cluster_update_commit
1368 (&uh, b, LAFS_INODE_LOG_START,
1369 LAFS_INODE_LOG_SIZE);
1371 lafs_cluster_update_abort(&uh);
1372 lafs_checkpoint_unlock(fs);
1374 if (test_bit(B_Dirty, &b->b.flags)) {
1375 /* FIXME need to write out the data block...
1376 * Is that just lafs_cluster_allocate ?
1380 if (LAFSI(ino)->update_cluster == 0) {
1381 lafs_checkpoint_lock(fs);
1382 if (test_bit(B_Dirty, &b->b.flags))
1383 LAFSI(ino)->update_cluster = 1;
1384 lafs_checkpoint_start(fs);
1385 lafs_checkpoint_unlock(fs);
1387 putdref(b, MKREF(write_inode));
1388 return 0; /* FIXME should I return some error message??? */
1391 void lafs_inode_fillblock(struct inode *ino)
1393 /* copy data from ino into the related data block */
1395 struct lafs_inode *li = LAFSI(ino);
1396 struct datablock *db = li->dblock;
1397 struct la_inode *lai;
1399 clear_bit(I_Dirty, &LAFSI(ino)->iflags);
1401 lai = map_dblock(db);
1402 lai->data_blocks = cpu_to_le32(li->cblocks);
1403 lai->index_blocks = cpu_to_le32(li->ciblocks);
1404 lai->generation = cpu_to_le16(ino->i_generation);
1405 lai->trunc_gen = li->trunc_gen;
1406 lai->flags = li->flags;
1407 lai->filetype = li->type;
1408 if (lai->metadata_size != cpu_to_le16(li->metadata_size)) {
1409 /* Changing metadata size is wierd.
1410 * We will need to handle this somehow for xattrs
1411 * For now we just want to cope with
1412 * Dir -> InodeFile changes, and that guarantees us
1413 * there is no index info - so just clear the index
1416 u16 *s = (u16*)(((char*)lai) + li->metadata_size);
1417 BUG_ON(li->type != TypeInodeFile);
1418 lai->metadata_size = cpu_to_le16(li->metadata_size);
1419 memset(s, 0, ino->i_sb->s_blocksize - li->metadata_size);
1420 *s = cpu_to_le16(IBLK_INDIRECT);
1422 lai->depth = li->depth;
1427 struct fs_md *i = &li->md.fs;
1428 struct fs_metadata *l = &lai->metadata[0].fs;
1431 l->snapshot_usage_table = cpu_to_le16(i->usagetable);
1432 l->update_time = cpu_to_le64(encode_time(&ino->i_mtime));
1433 l->blocks_used = cpu_to_le64(i->cblocks_used);
1434 l->blocks_allowed = cpu_to_le64(i->blocks_allowed);
1435 l->creation_age = cpu_to_le64(i->creation_age);
1436 l->inodes_used = cpu_to_le32(i->inodes_used);
1437 l->quota_inodes[0] = cpu_to_le32(i->quota_inums[0]);
1438 l->quota_inodes[1] = cpu_to_le32(i->quota_inums[1]);
1439 l->quota_inodes[2] = cpu_to_le32(i->quota_inums[2]);
1440 nlen = lai->metadata_size - offsetof(struct la_inode,
1441 metadata[0].fs.name);
1442 memset(l->name, 0, nlen);
1443 if (i->name == NULL)
1445 else if (strlen(i->name) < nlen)
1446 nlen = strlen(i->name);
1447 memcpy(l->name, i->name, nlen);
1453 struct inodemap_md *m = &li->md.inodemap;
1454 struct inodemap_metadata *s = &lai->metadata[0].inodemap;
1455 s->size = cpu_to_le32(m->size);
1459 case TypeSegmentMap:
1461 struct su_md *m = &li->md.segmentusage;
1462 struct su_metadata *s = &lai->metadata[0].segmentusage;
1463 s->table_size = cpu_to_le32(m->table_size);
1469 struct quota_md *m = &li->md.quota;
1470 struct quota_metadata *s = &lai->metadata[0].quota;
1471 s->gracetime = cpu_to_le32(m->gracetime);
1472 s->graceunits = cpu_to_le32(m->graceunits);
1475 case TypeOrphanList:
1476 case TypeAccessTime:
1479 default: /* TypeBase or larger */
1481 struct file_md *i = &li->md.file;
1482 struct file_metadata *l = &lai->metadata[0].file;
1483 struct dir_metadata *d = &lai->metadata[0].dir;
1484 struct special_metadata *s = &lai->metadata[0].special;
1486 if (li->type < TypeBase)
1488 l->flags = cpu_to_le16(i->flags);
1489 l->mode = cpu_to_le16(ino->i_mode);
1490 l->userid = cpu_to_le32(ino->i_uid);
1491 l->groupid = cpu_to_le32(ino->i_gid);
1492 l->treeid = cpu_to_le32(i->treeid);
1493 l->creationtime = cpu_to_le64(i->creationtime);
1494 l->modifytime = cpu_to_le64(encode_time(&ino->i_mtime));
1495 l->ctime = cpu_to_le64(encode_time(&ino->i_ctime));
1496 l->accesstime = cpu_to_le64(encode_time(&i->i_accesstime));
1497 l->size = cpu_to_le64(ino->i_size);
1498 l->parent = cpu_to_le32(i->parent);
1499 l->linkcount = cpu_to_le32(ino->i_nlink);
1505 d->hash_seed = cpu_to_le32(i->seed);
1510 s->major = cpu_to_le32(MAJOR(ino->i_rdev));
1511 s->minor = cpu_to_le32(MINOR(ino->i_rdev));
1516 unmap_dblock(db, lai);
1519 /*-----------------------------------------------------------------------
1520 * Inode allocate map handling.
1521 * Inode 1 of each fileset is a bitmap of free inode numbers.
1522 * Whenever the file is extended in size, new bits are set to one. They
1523 * are then cleared when the inode is allocated. When a block becomes
1524 * full of zeros, we don't need to store it any more.
1526 * We don't clear the bit until we are committed to creating an inode
1527 * This means we cannot clear it straight away, so two different threads
1528 * might see the same inode number as being available. We have two
1529 * approaches to guard against this.
1530 * Firstly we have a 'current' pointer into the inodemap file and
1531 * increase that past the inode we return. This discourages multiple
1532 * hits but as the pointer would need to be rewound occasionally it
1533 * isn't a guarantee. The guarantee against multiple allocations is done
1534 * via a flag in the block representing an inode. This is set
1535 * while an inode is being allocated.
1538 /* inode number allocation has the prealloc/pin/commit/abort structure
1539 * so it can be committed effectively
1543 choose_free_inum(struct fs *fs, struct super_block *sb, u32 *inump,
1544 struct datablock **bp, int *restarted)
1546 struct inode *im = lafs_iget(sb, 1, SYNC);
1548 struct datablock *b;
1554 struct inode *i = (*bp)->b.inode;
1555 putdref(*bp, MKREF(cfi_map));
1560 mutex_lock_nested(&im->i_mutex, I_MUTEX_QUOTA);
1562 bnum = LAFSI(im)->md.inodemap.thisblock;
1564 if (bnum == NoBlock ||
1565 LAFSI(im)->md.inodemap.nextbit >= (fs->blocksize<<3)) {
1566 if (bnum == NoBlock)
1567 bnum = LAFSI(im)->md.inodemap.size;
1569 if (bnum+1 < LAFSI(im)->md.inodemap.size)
1571 else if (!*restarted) {
1575 /* Need to add a new block to the file */
1576 bnum = LAFSI(im)->md.inodemap.size;
1577 b = lafs_get_block(im, bnum, NULL, GFP_KERNEL,
1582 lafs_iolock_written(&b->b);
1583 set_bit(B_PinPending, &b->b.flags);
1584 lafs_iounlock_block(&b->b);
1586 lafs_checkpoint_lock(fs);
1587 err = lafs_pin_dblock(b, NewSpace);
1588 if (err == -EAGAIN) {
1589 lafs_checkpoint_unlock_wait(fs);
1595 buf = map_dblock(b);
1596 /* Set block to "all are free" */
1597 memset(buf, 0xff, fs->blocksize);
1598 unmap_dblock(b, buf);
1599 set_bit(B_Valid, &b->b.flags);
1600 LAFSI(im)->md.inodemap.size = bnum+1;
1601 lafs_dirty_inode(im);
1602 lafs_dirty_dblock(b);
1603 lafs_checkpoint_unlock(fs);
1604 putdref(b, MKREF(cfi_map));
1607 err = lafs_find_next(im, &bnum);
1613 LAFSI(im)->md.inodemap.nextbit = 0;
1614 LAFSI(im)->md.inodemap.thisblock = bnum;
1617 b = lafs_get_block(im, bnum, NULL, GFP_KERNEL, MKREF(cfi_map));
1621 err = lafs_find_block(b, NOADOPT);
1624 if (b->b.physaddr == 0 && !test_bit(B_Valid, &b->b.flags)) {
1625 LAFSI(im)->md.inodemap.nextbit =
1626 (fs->blocksize<<3) + 1;
1627 putdref(b,MKREF(cfi_map));
1630 err = lafs_read_block(b);
1634 bit = LAFSI(im)->md.inodemap.nextbit;
1635 LAFSI(im)->md.inodemap.thisblock = bnum;
1636 buf = map_dblock(b);
1637 while (bnum == 0 && bit < 16) {
1638 /* Never return an inum below 16 - they are special */
1639 if (!generic_test_le_bit(bit, (unsigned long *)buf))
1640 generic___clear_le_bit(bit, (unsigned long *)buf);
1644 bit = generic_find_next_le_bit((unsigned long *)buf,
1645 fs->blocksize<<3, bit);
1646 unmap_dblock(b, buf);
1647 LAFSI(im)->md.inodemap.nextbit = bit+1;
1648 if (bit >= fs->blocksize<<3) {
1649 putdref(b,MKREF(cfi_map));
1652 mutex_unlock(&im->i_mutex);
1654 *inump = bit + (bnum << (im->i_blkbits + 3));
1658 lafs_checkpoint_unlock(fs);
1660 putdref(b, MKREF(cfi_map));
1662 mutex_unlock(&im->i_mutex);
1667 struct inode_map_new_info {
1668 struct datablock *ib, *mb;
1672 inode_map_new_prepare(struct fs *fs, int inum, struct super_block *sb,
1673 struct inode_map_new_info *imni)
1678 struct datablock *b;
1680 imni->ib = imni->mb = NULL;
1683 /* choose a possibly-free inode number */
1684 err = choose_free_inum(fs, sb, &choice,
1685 &imni->mb, &restarted);
1689 b = lafs_get_block(ino_from_sb(sb), choice, NULL, GFP_KERNEL,
1694 if (test_and_set_bit(B_Claimed, &b->b.flags)) {
1695 putdref(b, MKREF(cfi_ino));
1701 lafs_iolock_written(&imni->mb->b);
1702 set_bit(B_PinPending, &imni->mb->b.flags);
1703 lafs_iounlock_block(&imni->mb->b);
1705 set_bit(B_PinPending, &b->b.flags);
1712 inode_map_new_pin(struct inode_map_new_info *imni)
1716 err = lafs_pin_dblock(imni->mb, NewSpace);
1717 err = err ?: lafs_pin_dblock(imni->ib, NewSpace);
1722 inode_map_new_commit(struct inode_map_new_info *imni)
1727 int blksize = imni->ib->b.inode->i_sb->s_blocksize;
1728 int bit = imni->ib->b.fileaddr & (blksize*8 - 1);
1730 struct inode *ino = imni->mb->b.inode;
1732 mutex_lock_nested(&ino->i_mutex, I_MUTEX_QUOTA);
1733 buf = map_dblock(imni->mb);
1734 generic___clear_le_bit(bit, buf);
1735 if (buf[blksize/sizeof(*buf)-1] == 0 &&
1736 generic_find_next_le_bit(buf, blksize*8, 0) == blksize*8)
1737 /* block is empty, punch a hole */
1740 unmap_dblock(imni->mb, buf);
1742 lafs_erase_dblock(imni->mb);
1744 lafs_dirty_dblock(imni->mb);
1746 putdref(imni->mb, MKREF(cfi_map));
1747 mutex_unlock(&ino->i_mutex);
1750 putdref(imni->ib, MKREF(cfi_ino));
1754 inode_map_new_abort(struct inode_map_new_info *imni)
1757 clear_bit(B_Claimed, &imni->ib->b.flags);
1758 clear_bit(B_PinPending, &imni->ib->b.flags);
1759 lafs_orphan_release(fs_from_inode(imni->ib->b.inode),
1762 putdref(imni->ib, MKREF(cfi_ino));
1764 struct inode *ino = imni->mb->b.inode;
1765 putdref(imni->mb, MKREF(cfi_map));
1771 lafs_new_inode(struct fs *fs, struct super_block *sb, struct inode *dir,
1772 int type, int inum, int mode, struct datablock **inodbp)
1774 /* allocate and instantiate a new inode. If inum is non-zero,
1775 * choose any number, otherwise we are creating a special inode
1776 * and have to use the given number.
1777 * This creation is committed independently of any name that might
1778 * subsequently be given to the inode. So we register it as an
1779 * orphan so that it will be cleaned up if the name isn't
1780 * successfully created
1784 struct datablock *b;
1785 struct inode_map_new_info imni;
1786 struct update_handle ui;
1789 err = inode_map_new_prepare(fs, inum, sb, &imni);
1790 err = lafs_cluster_update_prepare(&ui, fs, sizeof(struct la_inode))
1793 err = lafs_make_orphan(fs, imni.ib);
1797 lafs_checkpoint_lock(fs);
1799 err = inode_map_new_pin(&imni);
1801 if (err == -EAGAIN) {
1802 lafs_checkpoint_unlock_wait(fs);
1808 b = getdref(imni.ib, MKREF(inode_new));
1810 lafs_iolock_block(&b->b); /* make sure we don't race with the cleaner
1811 * and zero this inode while trying to load it
1813 lafs_inode_init(b, type, mode, dir);
1814 lafs_iounlock_block(&b->b);
1816 inode_map_new_commit(&imni);
1817 ino = lafs_iget(sb, b->b.fileaddr, SYNC);
1819 lafs_cluster_update_abort(&ui);
1822 lafs_cluster_update_commit(&ui, b, 0,
1823 LAFSI(ino)->metadata_size);
1824 LAFS_BUG(LAFSI(ino)->dblock != b, &b->b);
1825 LAFS_BUG(b->my_inode != ino, &b->b);
1826 lafs_checkpoint_unlock(fs);
1831 putdref(b, MKREF(inode_new));
1835 lafs_checkpoint_unlock(fs);
1838 inode_map_new_abort(&imni);
1839 lafs_cluster_update_abort(&ui);
1840 dprintk("After abort %d: %s\n", err, strblk(&imni.ib->b));
1841 return ERR_PTR(err);
1844 static int inode_map_free(struct fs *fs, struct super_block *sb, u32 inum)
1846 struct inode *im = lafs_iget(sb, 1, SYNC);
1849 struct datablock *b;
1853 mutex_lock_nested(&im->i_mutex, I_MUTEX_QUOTA);
1855 bnum = inum >> (3 + sb->s_blocksize_bits);
1856 bit = inum - (bnum << (3 + sb->s_blocksize_bits));
1857 b = lafs_get_block(im, bnum, NULL, GFP_KERNEL, MKREF(inode_map_free));
1859 mutex_unlock(&im->i_mutex);
1863 err = lafs_read_block(b);
1865 putdref(b, MKREF(inode_map_free));
1866 mutex_unlock(&im->i_mutex);
1870 lafs_iolock_written(&b->b);
1871 set_bit(B_PinPending, &b->b.flags);
1872 lafs_iounlock_block(&b->b);
1874 lafs_checkpoint_lock(fs);
1875 err = lafs_pin_dblock(b, ReleaseSpace);
1876 if (err == -EAGAIN) {
1877 lafs_checkpoint_unlock_wait(fs);
1881 buf = map_dblock(b);
1882 generic___set_le_bit(bit, buf);
1883 unmap_dblock(b, buf);
1884 lafs_dirty_dblock(b);
1885 putdref(b, MKREF(inode_map_free));
1886 lafs_checkpoint_unlock(fs);
1887 mutex_unlock(&im->i_mutex);
1892 int lafs_inode_inuse(struct fs *fs, struct super_block *sb, u32 inum)
1894 /* This is used during roll-forward to register a newly created
1895 * inode in the inode map
1897 struct inode *im = lafs_iget(sb, 1, SYNC);
1900 struct datablock *b;
1904 mutex_lock_nested(&im->i_mutex, I_MUTEX_QUOTA);
1906 bnum = inum >> (3 + sb->s_blocksize_bits);
1907 bit = inum - (bnum << (3 + sb->s_blocksize_bits));
1908 if (bnum > LAFSI(im)->md.inodemap.size) {
1909 /* inum to unbelievably big */
1910 mutex_unlock(&im->i_mutex);
1914 b = lafs_get_block(im, bnum, NULL, GFP_KERNEL, MKREF(inode_map_free));
1916 mutex_unlock(&im->i_mutex);
1921 err = lafs_read_block(b);
1923 putdref(b, MKREF(inode_map_free));
1924 mutex_unlock(&im->i_mutex);
1929 lafs_iolock_written(&b->b);
1930 set_bit(B_PinPending, &b->b.flags);
1931 lafs_iounlock_block(&b->b);
1933 lafs_checkpoint_lock(fs);
1934 err = lafs_pin_dblock(b, CleanSpace);
1935 if (err == -EAGAIN) {
1936 lafs_checkpoint_unlock_wait(fs);
1940 buf = map_dblock(b);
1941 if (bnum == LAFSI(im)->md.inodemap.size) {
1942 /* need to add a new block to the file */
1943 memset(buf, 0xff, fs->blocksize);
1944 LAFSI(im)->md.inodemap.size = bnum + 1;
1945 lafs_dirty_inode(im);
1947 generic___clear_le_bit(bit, buf);
1948 unmap_dblock(b, buf);
1949 lafs_dirty_dblock(b);
1950 putdref(b, MKREF(inode_map_free));
1951 lafs_checkpoint_unlock(fs);
1952 mutex_unlock(&im->i_mutex);
1959 int lafs_setattr(struct dentry *dentry, struct iattr *attr)
1962 struct inode *ino = dentry->d_inode;
1963 struct fs *fs = fs_from_inode(ino);
1964 struct datablock *db;
1966 err = inode_change_ok(ino, attr);
1967 db = lafs_inode_dblock(ino, SYNC, MKREF(setattr));
1973 /* We don't need iolock_written here as we don't
1974 * actually change the inode block yet
1976 lafs_iolock_block(&db->b);
1977 set_bit(B_PinPending, &db->b.flags);
1978 lafs_iounlock_block(&db->b);
1980 /* FIXME quota stuff */
1983 lafs_checkpoint_lock(fs);
1984 err = lafs_pin_dblock(db, ReleaseSpace);
1985 if (err == -EAGAIN) {
1986 lafs_checkpoint_unlock_wait(fs);
1989 /* inode_setattr calls lafs_dirty_inode, which sets
1990 * I_Dirty so the dblock will get updated.
1992 err = err ?: inode_setattr(ino, attr);
1994 lafs_dirty_dblock(db);
1995 clear_bit(B_PinPending, &db->b.flags);
1996 putdref(db, MKREF(setattr));
1997 lafs_checkpoint_unlock(fs);
2002 void lafs_truncate(struct inode *ino)
2004 /* Want to truncate this file.
2005 * i_size has already been changed, and the address space
2006 * has been cleaned up.
2007 * So just start the background truncate
2009 struct fs *fs = fs_from_inode(ino);
2010 struct datablock *db = lafs_inode_dblock(ino, SYNC, MKREF(trunc));
2017 trunc_block = ((i_size_read(ino) + fs->blocksize - 1)
2018 >> fs->blocksize_bits);
2019 /* We hold i_mutex, so regular orphan processing cannot
2020 * contine - we have to push it forward ourselves.
2022 while (test_bit(I_Trunc, &LAFSI(ino)->iflags) &&
2023 LAFSI(ino)->trunc_next < trunc_block) {
2024 prepare_to_wait(&fs->async_complete, &wq,
2025 TASK_UNINTERRUPTIBLE);
2026 lafs_inode_handle_orphan(db);
2027 if (test_bit(B_Orphan, &db->b.flags))
2030 finish_wait(&fs->async_complete, &wq);
2032 /* There is nothing we can do about errors here. The
2033 * most likely are ENOMEM which itself is very unlikely.
2034 * If this doesn't get registered as an orphan .... maybe
2035 * it will have to wait until something else truncates it.
2037 lafs_make_orphan(fs, db);
2039 if (!test_and_set_bit(I_Trunc, &LAFSI(ino)->iflags))
2041 if (trunc_block == 0)
2042 LAFSI(ino)->trunc_gen++;
2043 LAFSI(ino)->trunc_next = trunc_block;
2044 putdref(db, MKREF(trunc));
2047 const struct inode_operations lafs_special_ino_operations = {
2048 .setattr = lafs_setattr,
2049 .getattr = lafs_getattr,
2050 .truncate = lafs_truncate,