4 * Copyright (C) 2005-2009
5 * Neil Brown <neilb@suse.de>
6 * Released under the GPL, version 2
8 * generic inode handling
13 #include <linux/random.h>
14 #include <linux/delay.h>
15 #include <linux/slab.h>
17 /* Supporting an async 'iget' - as required by the cleaner -
18 * is slightly non-trivial.
19 * iget*_locked will normally wait for any inode with one
20 * of the flags I_FREEING I_CLEAR I_WILL_FREE I_NEW
21 * to either be unhashed or has the flag cleared.
22 * We cannot afford that wait in the cleaner as we could deadlock.
23 * So we use iget5_locked and provide a test function that fails
24 * if it finds the inode with any of those flags set.
25 * If it does see the inode like that it clear the inum
26 * that is passed in (by reference) so that it knows to continue
27 * failing (for consistency) and so that the 'set' function
28 * we provide can know to fail the 'set'.
29 * The result of this is that if iget finds an inode it would
30 * have to wait on, the inum is cleared and NULL is returned.
31 * An unfortunate side effect is that an inode will be allocated
32 * and then destroyed to no avail.
33 * This is avoided by calling ilookup5 first. This also allows
34 * us to only allocate/load the data block if there really seems
37 #define NO_INO (~(ino_t)0)
38 static int async_itest(struct inode *inode, void *data)
44 /* found and is freeing */
46 if (inode->i_ino != inum)
48 if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW)) {
55 static int async_iset(struct inode *inode, void *data)
60 inode->i_ino = *inump;
65 lafs_iget(struct super_block *sb, ino_t inum, int async)
67 /* find, and load if needed, this inum */
68 struct inode *ino = NULL;
70 struct datablock *b = NULL;
71 struct inode *inodefile;
75 BUG_ON(inum == NO_INO);
81 /* We cannot afford to block on 'freeing_inode'
82 * So use iget5_locked and refuse to match such
84 * If the inode is 'freeing', inum gets set to NO_INO.
85 * ilookup5 is used first to avoid an unnecessary
86 * alloc/free if the inode is locked in some way.
91 ino = ilookup5(sb, inum, async_itest, &inum2);
98 /* For async we will always want the dblock loaded,
99 * and we need to load it first as we cannot afford
100 * to fail -EAGAIN once we have an I_NEW inode.
103 b = lafs_get_block(inodefile, inum, NULL,
104 GFP_NOFS, MKREF(iget));
106 return ERR_PTR(-ENOMEM);
109 err = lafs_read_block_async(b);
112 /* Have the block, so safe to iget */
114 ino = iget5_locked(sb, inum,
115 async_itest, async_iset,
125 if (test_and_set_bit(B_Async, &b->b.flags)) {
126 putdref(b, MKREF(iget));
129 getdref(b, MKREF(async));
133 ino = iget_locked(sb, inum);
136 putdref(b, MKREF(iget));
137 return ERR_PTR(-ENOMEM);
140 if (!(ino->i_state & I_NEW)) {
141 putdref(b, MKREF(iget));
145 return ERR_PTR(-ENOENT);
148 /* Need to load block 'inum' from an inode file...
151 b = lafs_get_block(inodefile, inum, NULL, GFP_KERNEL, MKREF(iget));
155 err = lafs_read_block(b);
160 oldino = rcu_my_inode(b);
162 /* The inode is new, but the block thinks it has an
163 * old inode, so we must be in the process of destroying
165 * So fail the lookup without even looking at the content
166 * of the block (Which might not be clear yet).
168 spin_lock(&oldino->i_data.private_lock);
169 if (!test_bit(I_Deleting, &LAFSI(oldino)->iflags)) {
171 LAFSI(oldino)->dblock = NULL;
172 LAFSI(oldino)->iblock = NULL;
174 spin_unlock(&oldino->i_data.private_lock);
182 err = lafs_import_inode(ino, b);
185 printk("lafs_import_inode failed %d\n", err);
188 unlock_new_inode(ino);
190 if (b && test_and_clear_bit(B_Async, &b->b.flags)) {
191 putdref(b, MKREF(async));
192 lafs_wake_thread(fs_from_sb(sb));
194 putdref(b, MKREF(iget));
198 unlock_new_inode(ino);
205 lafs_iget_fs(struct fs *fs, int fsnum, int inum, int async)
207 struct super_block *sb;
213 /* Need to locate or load the superblock for this
214 * subordinate filesystem
216 struct inode *filesys;
217 struct super_block *sb2;
219 filesys = lafs_iget(sb, fsnum, async);
222 if (LAFSI(filesys)->type != TypeInodeFile) {
224 return ERR_PTR(-ENOENT);
226 /* FIXME can get_subset_sb be async at all?? */
227 sb2 = lafs_get_subset_sb(filesys);
230 return ERR_PTR(PTR_ERR(sb2));
232 rv = lafs_iget(sb2, inum, async);
234 deactivate_locked_super(sb2);
236 up_write(&sb2->s_umount);
238 rv = lafs_iget(sb, inum, async);
239 atomic_inc(&sb->s_active);
245 lafs_import_inode(struct inode *ino, struct datablock *b)
247 struct la_inode *lai = map_dblock(b);
248 struct lafs_inode *li = LAFSI(ino);
251 if (lai->filetype == 0) {
258 ino->i_mode = S_IFREG;
259 ino->i_nlink = 1; /* For special file, set nlink so they
260 * never appear unlinked */
264 LAFS_BUG(ino->i_ino != b->b.fileaddr, &b->b);
265 li->cblocks = le32_to_cpu(lai->data_blocks);
266 li->pblocks = li->ablocks = 0;
267 li->vfs_inode.i_blocks = ((blkcnt_t)li->cblocks
268 << (ino->i_sb->s_blocksize_bits - 9));
269 li->ciblocks = le32_to_cpu(lai->index_blocks);
273 ino->i_generation = le16_to_cpu(lai->generation);
274 li->trunc_gen = lai->trunc_gen;
275 li->flags = lai->flags;
276 li->type = lai->filetype;
277 li->metadata_size = le16_to_cpu(lai->metadata_size);
278 li->depth = lai->depth;
280 dprintk("inode %lu type is %d\n", (unsigned long)ino->i_ino, li->type);
282 ino->i_data.a_ops = &lafs_file_aops;
288 struct fs_md *i = &li->md.fs;
289 struct fs_metadata *l = &lai->metadata[0].fs;
292 i->usagetable = le16_to_cpu(l->snapshot_usage_table);
293 decode_time(&ino->i_mtime, le64_to_cpu(l->update_time));
294 i->cblocks_used = le64_to_cpu(l->blocks_used);
295 i->pblocks_used = i->ablocks_used = 0;
296 i->blocks_allowed = le64_to_cpu(l->blocks_allowed);
297 i->blocks_unalloc = 0;
298 i->creation_age = le64_to_cpu(l->creation_age);
299 i->inodes_used = le32_to_cpu(l->inodes_used);
300 i->quota_inums[0] = le32_to_cpu(l->quota_inodes[0]);
301 i->quota_inums[1] = le32_to_cpu(l->quota_inodes[1]);
302 i->quota_inums[2] = le32_to_cpu(l->quota_inodes[2]);
303 i->quota_inodes[0] = i->quota_inodes[1]
304 = i->quota_inodes[2] = NULL;
305 nlen = li->metadata_size - offsetof(struct la_inode,
306 metadata[0].fs.name);
312 /* Need to unmap the dblock to kmalloc because
313 * the mapping makes us 'atomic'
315 unmap_dblock(b, lai);
316 i->name = kmalloc(nlen+1, GFP_KERNEL);
318 l = &lai->metadata[0].fs;
323 memcpy(i->name, l->name, nlen);
326 /* Make this look like a directory */
327 ino->i_mode = S_IFDIR;
331 ino->i_op = &lafs_subset_ino_operations;
332 ino->i_fop = &lafs_subset_file_operations;
338 struct inodemap_md *m = &li->md.inodemap;
339 struct inodemap_metadata *s = &lai->metadata[0].inodemap;
340 m->size = le32_to_cpu(s->size);
341 m->thisblock = NoBlock;
348 struct su_md *m = &li->md.segmentusage;
349 struct su_metadata *s = &lai->metadata[0].segmentusage;
350 m->table_size = le32_to_cpu(s->table_size);
356 struct quota_md *m = &li->md.quota;
357 struct quota_metadata *s = &lai->metadata[0].quota;
358 m->gracetime = le32_to_cpu(s->gracetime);
359 m->graceunits = le32_to_cpu(s->graceunits);
364 struct orphan_md *m = &li->md.orphan;
365 /* This will be set via lafs_count_orphans */
373 default: /* TypeBase or larger */
375 struct file_md *i = &li->md.file;
376 struct file_metadata *l = &lai->metadata[0].file;
377 struct dir_metadata *d = &lai->metadata[0].dir;
378 struct special_metadata *s = &lai->metadata[0].special;
380 if (li->type < TypeBase)
382 i->flags = le16_to_cpu(l->flags);
383 ino->i_mode = le16_to_cpu(l->mode);
384 ino->i_uid = le32_to_cpu(l->userid);
385 ino->i_gid = le32_to_cpu(l->groupid);
386 i->treeid = le32_to_cpu(l->treeid);
387 i->creationtime = le64_to_cpu(l->creationtime);
388 decode_time(&ino->i_mtime, le64_to_cpu(l->modifytime));
389 decode_time(&ino->i_ctime, le64_to_cpu(l->ctime));
390 decode_time(&i->i_accesstime, le64_to_cpu(l->accesstime));
391 ino->i_atime = i->i_accesstime; /* FIXME load from
393 ino->i_size = le64_to_cpu(l->size);
394 i->parent = le32_to_cpu(l->parent);
395 ino->i_nlink = le32_to_cpu(l->linkcount);
396 if (ino->i_nlink == 0 && list_empty(&b->orphans) &&
397 fs_from_inode(ino)->rolled) {
398 /* This block should already be on the orphan
399 * list, otherwise there is a filesystem
401 * Either the orphan file is wrong, or the
402 * linkcount is wrong.
403 * It is safest to assume the later - either
404 * way an FS check would be needed to fix it.
405 * Note: while roll-forward is happening, this
406 * situation is perfectly possible and is handled
409 /* FIXME set a superblock flag requesting
410 * directory linkage checking
415 dprintk(" mode = 0%o uid %d size %lld\n",
416 ino->i_mode, ino->i_uid, ino->i_size);
419 ino->i_op = &lafs_file_ino_operations;
420 ino->i_fop = &lafs_file_file_operations;
421 ino->i_mode = (ino->i_mode & 07777) | S_IFREG;
424 i->seed = le32_to_cpu(d->hash_seed);
425 ino->i_op = &lafs_dir_ino_operations;
426 ino->i_fop = &lafs_dir_file_operations;
427 ino->i_mode = (ino->i_mode & 07777) | S_IFDIR;
430 dprintk("Hmm. %d %d %d\n",
437 ino->i_op = &lafs_link_ino_operations;
438 ino->i_mode = (ino->i_mode & 07777) | S_IFLNK;
441 /* the data had better be in the inode ... */
442 ino->i_rdev = MKDEV(le32_to_cpu(s->major),
443 le32_to_cpu(s->minor));
444 ino->i_op = &lafs_special_ino_operations;
445 init_special_inode(ino, ino->i_mode, ino->i_rdev);
452 ino->i_blkbits = ino->i_sb->s_blocksize_bits;
453 /* FIXME i_blocks and i_byte - used for quota?? */
456 /* Note: no refcount yet. Either will remove the reference to the
460 rcu_assign_pointer(b->my_inode, ino);
464 printk("inode %lu type is %d\n",
465 (unsigned long)ino->i_ino, li->type);
466 unmap_dblock(b, lai);
470 void lafs_inode_checkpin(struct inode *ino)
472 /* Make sure I_Pinned is set correctly.
473 * It should be set precisely if i_nlink is non-zero,
474 * and ->iblock is B_Pinned.
475 * When it is set, we own a reference to the inode.
477 * This needs to be called whenever we change
478 * i_nlink, and whenever we pin or unpin an InoIdx
481 if (ino->i_nlink == 0) {
482 /* I_Pinned should not be set */
483 if (test_and_clear_bit(I_Pinned, &LAFSI(ino)->iflags))
486 /* Need to check if iblock is Pinned. */
487 struct indexblock *ib = NULL;
488 if (LAFSI(ino)->iblock) {
489 spin_lock(&ino->i_data.private_lock);
490 ib = LAFSI(ino)->iblock;
491 if (ib && !test_bit(B_Pinned, &ib->b.flags))
493 spin_unlock(&ino->i_data.private_lock);
496 if (!test_and_set_bit(I_Pinned, &LAFSI(ino)->iflags))
499 if (test_and_clear_bit(I_Pinned, &LAFSI(ino)->iflags))
505 struct datablock *lafs_inode_get_dblock(struct inode *ino, REFARG)
507 struct datablock *db;
509 spin_lock(&ino->i_data.private_lock);
510 db = LAFSI(ino)->dblock;
512 if (db->b.inode == ino)
513 getdref_locked(db, REF);
515 spin_lock_nested(&db->b.inode->i_data.private_lock, 1);
516 getdref_locked(db, REF);
517 spin_unlock(&db->b.inode->i_data.private_lock);
520 spin_unlock(&ino->i_data.private_lock);
524 struct datablock *lafs_inode_dblock(struct inode *ino, int async, REFARG)
526 struct datablock *db;
529 db = lafs_inode_get_dblock(ino, REF);
532 db = lafs_get_block(ino_from_sb(ino->i_sb), ino->i_ino, NULL,
535 return ERR_PTR(-ENOMEM);
537 LAFSI(ino)->dblock = db;
538 rcu_assign_pointer(db->my_inode, ino);
540 err = lafs_read_block_async(db);
542 err = lafs_read_block(db);
550 void lafs_inode_init(struct datablock *b, int type, int mode, struct inode *dir)
552 /* A new block has been allocated in an inode file to hold an
553 * inode. We get to fill in initial values so that when
554 * 'iget' calls lafs_import_inode, the correct inode is
557 struct fs *fs = fs_from_inode(b->b.inode);
558 struct la_inode *lai = map_dblock(b);
561 lai->data_blocks = cpu_to_le32(0);
562 lai->index_blocks = cpu_to_le32(0);
563 get_random_bytes(&lai->generation, sizeof(lai->generation));
566 lai->filetype = type;
572 struct fs_metadata *l = &lai->metadata[0].fs;
573 size = sizeof(struct fs_metadata);
576 l->blocks_allowed = 0;
577 l->creation_age = fs->wc[0].cluster_seq;
579 l->quota_inodes[0] = 0;
580 l->quota_inodes[1] = 0;
581 l->quota_inodes[2] = 0;
582 l->snapshot_usage_table = 0;
584 /* name will be zero length and not used */
589 struct inodemap_metadata *l = &lai->metadata[0].inodemap;
591 size = sizeof(struct inodemap_metadata);
595 size = sizeof(struct su_metadata);
598 size = sizeof(struct quota_metadata);
608 struct file_metadata *l = &lai->metadata[0].file;
609 struct timespec now = CURRENT_TIME;
611 l->flags = cpu_to_le16(0);
612 l->userid = cpu_to_le32(current->cred->fsuid);
613 if (dir && (dir->i_mode & S_ISGID)) {
614 l->groupid = cpu_to_le32(dir->i_gid);
618 l->groupid = cpu_to_le32(current->cred->fsgid);
619 if (dir && LAFSI(dir)->md.file.treeid)
620 l->treeid = cpu_to_le32(LAFSI(dir)->md.file.treeid);
622 l->treeid = l->userid;
624 l->mode = cpu_to_le16(mode);
625 l->creationtime = encode_time(&now);
626 l->modifytime = l->creationtime;
627 l->ctime = l->creationtime;
628 l->accesstime = l->creationtime;
630 l->parent = dir ? cpu_to_le32(dir->i_ino) : 0;
633 if (type == TypeDir) {
634 struct dir_metadata *l = &lai->metadata[0].dir;
636 get_random_bytes(&seed,
638 seed = (seed & ~7) | 1;
639 l->hash_seed = cpu_to_le32(seed);
640 size = sizeof(struct dir_metadata);
641 } else if (type == TypeSpecial) {
642 struct special_metadata *s = &lai->metadata[0].special;
643 s->major = s->minor = 0;
644 size = sizeof(struct special_metadata);
646 size = sizeof(struct file_metadata);
649 size += sizeof(struct la_inode);
650 lai->metadata_size = cpu_to_le32(size);
651 memset(((char *)lai)+size, 0, fs->blocksize-size);
652 *(u16 *)(((char *)lai)+size) = cpu_to_le16(IBLK_EXTENT);
654 unmap_dblock(b, lai);
655 set_bit(B_Valid, &b->b.flags);
656 LAFS_BUG(!test_bit(B_Pinned, &b->b.flags), &b->b);
657 lafs_dirty_dblock(b);
660 void lafs_clear_inode(struct inode *ino)
662 struct lafs_inode *li = LAFSI(ino);
663 dprintk("CLEAR INODE %d\n", (int)ino->i_ino);
667 /* Now is a good time to break the linkage between
668 * inode and dblock - but not if the file is
671 if (!test_bit(I_Deleting, &LAFSI(ino)->iflags)) {
672 struct datablock *db;
673 spin_lock(&ino->i_data.private_lock);
674 db = LAFSI(ino)->dblock;
676 struct indexblock *ib = LAFSI(ino)->iblock;
677 LAFS_BUG(ib && atomic_read(&ib->b.refcnt), &db->b);
679 LAFSI(ino)->dblock = NULL;
680 LAFSI(ino)->iblock = NULL;
682 spin_unlock(&ino->i_data.private_lock);
685 /* FIXME release quota inodes if filesystem */
688 static int inode_map_free(struct fs *fs, struct super_block *sb, u32 inum);
690 void lafs_delete_inode(struct inode *ino)
692 struct fs *fs = fs_from_inode(ino);
695 if (ino->i_mode == 0) {
696 /* There never was an inode here,
702 dprintk("DELETE INODE %d\n", (int)ino->i_ino);
704 /* Normal truncation holds an igrab, so we cannot be
705 * deleted until any truncation finishes
707 BUG_ON(test_bit(I_Trunc, &LAFSI(ino)->iflags));
709 b = lafs_inode_dblock(ino, SYNC, MKREF(delete_inode));
711 i_size_write(ino, 0);
712 truncate_inode_pages(&ino->i_data, 0);
713 LAFSI(ino)->trunc_next = 0;
714 set_bit(I_Deleting, &LAFSI(ino)->iflags);
715 set_bit(I_Trunc, &LAFSI(ino)->iflags);
719 set_bit(B_Claimed, &b->b.flags);
720 lafs_add_orphan(fs, b);
721 dprintk("PUNCH hole for %d\n", (int)b->b.fileaddr);
722 putdref(b, MKREF(delete_inode));
724 inode_map_free(fs, ino->i_sb, ino->i_ino);
729 static int prune(void *data, u32 addr, u64 paddr, int len)
731 /* This whole index block is being pruned, just account
732 * for everything and it will be cleared afterwards
734 struct indexblock *ib = data;
735 struct inode *ino = ib->b.inode;
736 struct fs *fs = fs_from_inode(ino);
737 int ph = !!test_bit(B_Phase1, &ib->b.flags);
739 dprintk("PRUNE %d for %d at %lld\n", addr, len, (long long)paddr);
740 if (paddr == 0 || len == 0)
742 for (i = 0 ; i < len ; i++)
743 lafs_summary_update(fs, ino, paddr+i, 0, 0, ph, 0);
747 static int prune_some(void *data, u32 addr, u64 paddr, int len)
749 /* Part of this index block is being pruned. Copy
750 * what addresses we can into uninc_table so that
751 * it can be 'incorporated'
752 * We should probably share some code with
753 * lafs_allocated_block??
755 struct indexblock *ib = data;
756 struct inode *ino = ib->b.inode;
757 struct fs *fs = fs_from_inode(ino);
758 int ph = !!test_bit(B_Phase1, &ib->b.flags);
761 if (paddr == 0 || len == 0)
763 dprintk("PRUNE2 %d for %d at %lld\n", addr, len, (long long)paddr);
764 for (i = 0 ; i < len ; i++) {
765 /* FIXME should allow longer truncation ranges in uninc_table
766 * as they are easy to handle.
769 if (addr + i < LAFSI(ino)->trunc_next)
771 spin_lock(&ino->i_data.private_lock);
772 a = &ib->uninc_table.pending_addr
773 [ib->uninc_table.pending_cnt - 1];
774 if (ib->uninc_table.pending_cnt <
775 ARRAY_SIZE(ib->uninc_table.pending_addr)) {
777 a->fileaddr = addr + i;
780 LAFS_BUG(!test_bit(B_Pinned, &ib->b.flags), &ib->b);
781 ib->uninc_table.pending_cnt++;
783 spin_unlock(&ino->i_data.private_lock);
786 spin_unlock(&ino->i_data.private_lock);
787 lafs_summary_update(fs, ino, paddr+i, 0, 0, ph, 0);
792 int lafs_inode_handle_orphan(struct datablock *b)
794 /* Don't need rcu protection for my_inode run_orphan
797 struct indexblock *ib, *ib2;
798 struct inode *ino = b->my_inode;
799 struct fs *fs = fs_from_inode(ino);
800 u32 trunc_next, next_trunc;
804 if (!test_bit(I_Trunc, &LAFSI(ino)->iflags)) {
805 if (test_bit(I_Deleting, &LAFSI(ino)->iflags)) {
806 LAFS_BUG(ino->i_nlink, &b->b);
807 if (LAFSI(ino)->cblocks +
808 LAFSI(ino)->pblocks +
809 LAFSI(ino)->ablocks +
810 LAFSI(ino)->ciblocks +
811 LAFSI(ino)->piblocks)
812 printk("Deleting inode %lu: %ld+%ld+%ld %ld+%ld\n",
817 LAFSI(ino)->ciblocks,
818 LAFSI(ino)->piblocks);
819 BUG_ON(LAFSI(ino)->cblocks +
820 LAFSI(ino)->pblocks +
821 LAFSI(ino)->ablocks +
822 LAFSI(ino)->ciblocks +
823 LAFSI(ino)->piblocks);
824 if (lafs_erase_dblock_async(b))
825 lafs_orphan_release(fs, b);
826 } else if (ino->i_nlink || LAFSI(ino)->type == 0)
827 lafs_orphan_release(fs, b);
829 lafs_orphan_forget(fs, b);
833 ib = lafs_make_iblock(ino, ADOPT, SYNC, MKREF(inode_handle_orphan));
837 /* Here is the guts of 'truncate'. We find the next leaf index
838 * block and discard all the addresses there-in.
840 trunc_next = LAFSI(ino)->trunc_next;
842 if (trunc_next == 0xFFFFFFFF) {
843 /* truncate has finished in that all data blocks
844 * have been removed and all index block are either
845 * gone or pending incorporation at which point they will
847 * If we hit a phase change, we will need to postpone
848 * the rest of the cleaning until it completes.
849 * If there is a checkpoint happening, then all the work
850 * that we can do now, it will do for us. So just
853 struct indexblock *tmp;
854 struct indexblock *next;
857 if (!test_bit(B_Pinned, &ib->b.flags)) {
858 /* must be finished */
859 LAFS_BUG(test_bit(B_Dirty, &ib->b.flags), &ib->b);
860 clear_bit(I_Trunc, &LAFSI(ino)->iflags);
862 wake_up(&fs->trunc_wait);
866 if (fs->checkpointing) {
867 /* This cannot happen with current code,
868 * but leave it in case we ever have
869 * orphan handling parallel with checkpoints
871 err = -EBUSY; /* Try again after the checkpoint */
875 lastaddr = (i_size_read(ino) +
877 >> fs->blocksize_bits;
878 /* Find a Pinned descendent of ib which has no
879 * Pinned descendents and no PrimaryRef dependent
880 * (so take the last).
881 * Prefer blocks that are beyond EOF (again, take the last).
882 * If there are none, descend the last block that
883 * is not after EOF and look at its children.
886 spin_lock(&ib->b.inode->i_data.private_lock);
890 list_for_each_entry(tmp, &ib2->children, b.siblings) {
891 if (!test_bit(B_Index, &tmp->b.flags) ||
892 !test_bit(B_Pinned, &tmp->b.flags))
895 tmp->b.fileaddr > next->b.fileaddr)
899 if (ib2->b.fileaddr < lastaddr) {
900 /* Must be all done */
901 spin_unlock(&ib->b.inode->i_data.private_lock);
902 clear_bit(I_Trunc, &LAFSI(ino)->iflags);
904 wake_up(&fs->trunc_wait);
908 getiref(ib2, MKREF(inode_handle_orphan2));
909 spin_unlock(&ib->b.inode->i_data.private_lock);
911 /* ib2 is an index block beyond EOF with no
913 * Incorporating it should unpin it.
915 if (!list_empty(&ib2->children)) {
916 lafs_print_tree(&ib2->b, 3);
917 LAFS_BUG(1, &ib2->b);
920 if (!lafs_iolock_written_async(&ib2->b)) {
921 putiref(ib2, MKREF(inode_handle_orphan2));
925 while (ib2->uninc_table.pending_cnt || ib2->uninc)
926 lafs_incorporate(fs, ib2);
928 if (test_bit(B_Dirty, &ib2->b.flags) ||
929 test_bit(B_Realloc, &ib2->b.flags))
930 lafs_cluster_allocate(&ib2->b, 0);
932 lafs_iounlock_block(&ib2->b);
934 if (!list_empty(&ib2->b.siblings)) {
935 printk("looping on %s\n", strblk(&ib2->b));
940 putiref(ib2, MKREF(inode_handle_orphan2));
943 if (lafs_iolock_written_async(&ib->b)) {
945 lafs_incorporate(fs, ib);
946 lafs_iounlock_block(&ib->b);
951 putiref(ib, MKREF(inode_handle_orphan));
955 putiref(ib, MKREF(inode_handle_orphan));
957 ib = lafs_leaf_find(ino, trunc_next, ADOPT, &next_trunc,
958 ASYNC, MKREF(inode_handle_orphan3));
961 /* now hold an iolock on ib */
963 /* Ok, trunc_next seems to refer to a block that exists.
964 * We need to erase it..
966 * So we open up the index block ourselves, call
967 * lafs_summary_update with each block address, and then
971 if (LAFSI(ino)->depth == 0) {
972 /* Nothing to truncate */
973 clear_bit(I_Trunc, &LAFSI(ino)->iflags);
975 if (test_bit(B_Pinned, &ib->b.flags))
976 /* Need to move the dirtiness which keeps this
977 * pinned to the data block.
979 lafs_cluster_allocate(&ib->b, 0);
981 lafs_iounlock_block(&ib->b);
986 lafs_checkpoint_lock(fs);
987 err = lafs_reserve_block(&ib->b, ReleaseSpace);
991 if (!test_bit(B_Valid, &ib->b.flags) &&
992 test_bit(B_InoIdx, &ib->b.flags)) {
993 /* still invalid, just re-erase to remove
995 LAFSI(ino)->trunc_next = next_trunc;
996 lafs_cluster_allocate(&ib->b, 0);
1001 lafs_pin_block(&ib->b);
1003 /* It might be that this can happen, in which case
1004 * we simply update trunc_next and loop. But I'd like
1005 * to be sure before I implement that
1007 if (!test_bit(B_Valid, &ib->b.flags)) {
1008 printk("Not Valid: %s\n", strblk(&ib->b));
1009 printk("depth = %d\n", LAFSI(ino)->depth);
1010 if (test_bit(B_InoIdx, &ib->b.flags))
1011 printk("DB: %s\n", strblk(&LAFSI(ib->b.inode)->dblock->b));
1012 LAFSI(ino)->trunc_next = next_trunc;
1013 //BUG_ON(!test_bit(B_Valid, &ib->b.flags));
1018 if (ib->b.fileaddr < trunc_next &&
1019 lafs_leaf_next(ib, 0) < trunc_next) {
1020 /* We only want to truncate part of this index block.
1021 * So we copy addresses into uninc_table and then
1022 * call lafs_incorporate.
1023 * This might cause the index tree to grow, so we
1024 * cannot trust next_trunc
1026 if (ib->uninc_table.pending_cnt == 0 &&
1027 ib->uninc == NULL) {
1028 lafs_dirty_iblock(ib, 0);
1029 /* FIXME this just removes 8 blocks at a time,
1030 * which is not enough
1032 lafs_walk_leaf_index(ib, prune_some, ib);
1034 if (test_bit(B_Dirty, &ib->b.flags))
1035 lafs_incorporate(fs, ib);
1039 LAFSI(ino)->trunc_next = next_trunc;
1041 while (ib->uninc_table.pending_cnt || ib->uninc) {
1042 /* There should be no Realloc data blocks here
1043 * but index blocks might be realloc still.
1045 LAFS_BUG(!test_bit(B_Dirty, &ib->b.flags) &&
1046 !test_bit(B_Realloc, &ib->b.flags), &ib->b);
1047 lafs_incorporate(fs, ib);
1049 if (test_bit(B_InoIdx, &ib->b.flags) ||
1050 !test_bit(B_PhysValid, &ib->b.flags) ||
1051 ib->b.physaddr != 0) {
1052 lafs_walk_leaf_index(ib, prune, ib);
1053 lafs_clear_index(ib);
1054 lafs_dirty_iblock(ib, 0);
1056 if (test_bit(B_Dirty, &ib->b.flags))
1057 lafs_incorporate(fs, ib);
1058 if (!list_empty(&ib->children))
1059 lafs_print_tree(&ib->b, 2);
1060 LAFS_BUG(!list_empty(&ib->children), &ib->b);
1063 lafs_iounlock_block(&ib->b);
1065 lafs_checkpoint_unlock(fs);
1067 putiref(ib, MKREF(inode_handle_orphan3));
1071 void lafs_dirty_inode(struct inode *ino)
1073 /* this is called in one of three cases:
1074 * 1/ by lafs internally when dblock or iblock is pinned and
1075 * ready to be dirtied
1076 * 2/ by writeout before requesting a write - to update mtime
1077 * 3/ by read to update atime
1079 * As we don't know which, there is not much we can do.
1080 * We mustn't update the data block as it could be in
1081 * writeout and we cannot always wait safely.
1082 * So require that anyone who really cares, dirties the datablock
1083 * or a child themselves.
1084 * When cluster_allocate eventually gets called, it will update
1085 * the datablock from the inode.
1086 * If an update has to wait for the next phase, lock_dblock
1087 * (e.g. in setattr) will do that.
1089 * We also use this opportunity to update the filesystem modify time.
1091 struct timespec now;
1092 struct inode *filesys;
1093 set_bit(I_Dirty, &LAFSI(ino)->iflags);
1094 ino->i_sb->s_dirt = 1;
1096 now = current_fs_time(ino->i_sb);
1097 filesys = ino_from_sb(ino->i_sb);
1098 if (!timespec_equal(&filesys->i_mtime, &now)) {
1099 filesys->i_mtime = now;
1100 set_bit(I_Dirty, &LAFSI(filesys)->iflags);
1104 int lafs_sync_inode(struct inode *ino, int wait)
1106 /* fsync has been called on this file so we need
1107 * to sync any inode updates to the next cluster.
1109 * If we cannot create an update record,
1110 * we wait for a phase change, which writes everything
1113 struct datablock *b;
1114 struct fs *fs = fs_from_inode(ino);
1115 struct update_handle uh;
1119 if (LAFSI(ino)->update_cluster > 1)
1120 lafs_cluster_wait(fs, LAFSI(ino)->update_cluster);
1121 if (LAFSI(ino)->update_cluster == 1) {
1122 lafs_checkpoint_lock(fs);
1123 lafs_checkpoint_unlock_wait(fs);
1128 LAFSI(ino)->update_cluster = 0;
1129 if (!test_bit(I_Dirty, &LAFSI(ino)->iflags))
1131 b = lafs_inode_dblock(ino, SYNC, MKREF(write_inode));
1135 lafs_iolock_written(&b->b);
1136 lafs_inode_fillblock(ino);
1137 lafs_iounlock_block(&b->b);
1139 err = lafs_cluster_update_prepare(&uh, fs, LAFS_INODE_LOG_SIZE);
1141 lafs_cluster_update_abort(&uh);
1143 lafs_checkpoint_lock(fs);
1144 if (lafs_cluster_update_pin(&uh) == 0) {
1145 if (test_and_clear_bit(B_Dirty, &b->b.flags))
1146 lafs_space_return(fs, 1);
1147 LAFSI(ino)->update_cluster =
1148 lafs_cluster_update_commit
1149 (&uh, b, LAFS_INODE_LOG_START,
1150 LAFS_INODE_LOG_SIZE);
1152 lafs_cluster_update_abort(&uh);
1153 lafs_checkpoint_unlock(fs);
1155 if (test_bit(B_Dirty, &b->b.flags)) {
1156 /* FIXME need to write out the data block...
1157 * Is that just lafs_cluster_allocate ?
1161 if (LAFSI(ino)->update_cluster == 0) {
1162 lafs_checkpoint_lock(fs);
1163 if (test_bit(B_Dirty, &b->b.flags))
1164 LAFSI(ino)->update_cluster = 1;
1165 lafs_checkpoint_start(fs);
1166 lafs_checkpoint_unlock(fs);
1168 putdref(b, MKREF(write_inode));
1169 return 0; /* FIXME should I return some error message??? */
1172 void lafs_inode_fillblock(struct inode *ino)
1174 /* copy data from ino into the related data block */
1176 struct lafs_inode *li = LAFSI(ino);
1177 struct datablock *db = li->dblock;
1178 struct la_inode *lai;
1180 clear_bit(I_Dirty, &LAFSI(ino)->iflags);
1182 lai = map_dblock(db);
1183 lai->data_blocks = cpu_to_le32(li->cblocks);
1184 lai->index_blocks = cpu_to_le32(li->ciblocks);
1185 lai->generation = cpu_to_le16(ino->i_generation);
1186 lai->trunc_gen = li->trunc_gen;
1187 lai->flags = li->flags;
1188 lai->filetype = li->type;
1189 if (lai->metadata_size != cpu_to_le16(li->metadata_size)) {
1190 /* Changing metadata size is wierd.
1191 * We will need to handle this somehow for xattrs
1192 * For now we just want to cope with
1193 * Dir -> InodeFile changes, and that guarantees us
1194 * there is no index info - so just clear the index
1197 u16 *s = (u16*)(((char*)lai) + li->metadata_size);
1198 BUG_ON(li->type != TypeInodeFile);
1199 lai->metadata_size = cpu_to_le16(li->metadata_size);
1200 memset(s, 0, ino->i_sb->s_blocksize - li->metadata_size);
1201 *s = cpu_to_le16(IBLK_INDIRECT);
1203 lai->depth = li->depth;
1208 struct fs_md *i = &li->md.fs;
1209 struct fs_metadata *l = &lai->metadata[0].fs;
1212 l->snapshot_usage_table = cpu_to_le16(i->usagetable);
1213 l->update_time = cpu_to_le64(encode_time(&ino->i_mtime));
1214 l->blocks_used = cpu_to_le64(i->cblocks_used);
1215 l->blocks_allowed = cpu_to_le64(i->blocks_allowed);
1216 l->creation_age = cpu_to_le64(i->creation_age);
1217 l->inodes_used = cpu_to_le32(i->inodes_used);
1218 l->quota_inodes[0] = cpu_to_le32(i->quota_inums[0]);
1219 l->quota_inodes[1] = cpu_to_le32(i->quota_inums[1]);
1220 l->quota_inodes[2] = cpu_to_le32(i->quota_inums[2]);
1221 nlen = lai->metadata_size - offsetof(struct la_inode,
1222 metadata[0].fs.name);
1223 memset(l->name, 0, nlen);
1224 if (i->name == NULL)
1226 else if (strlen(i->name) < nlen)
1227 nlen = strlen(i->name);
1228 memcpy(l->name, i->name, nlen);
1234 struct inodemap_md *m = &li->md.inodemap;
1235 struct inodemap_metadata *s = &lai->metadata[0].inodemap;
1236 s->size = cpu_to_le32(m->size);
1240 case TypeSegmentMap:
1242 struct su_md *m = &li->md.segmentusage;
1243 struct su_metadata *s = &lai->metadata[0].segmentusage;
1244 s->table_size = cpu_to_le32(m->table_size);
1250 struct quota_md *m = &li->md.quota;
1251 struct quota_metadata *s = &lai->metadata[0].quota;
1252 s->gracetime = cpu_to_le32(m->gracetime);
1253 s->graceunits = cpu_to_le32(m->graceunits);
1256 case TypeOrphanList:
1257 case TypeAccessTime:
1260 default: /* TypeBase or larger */
1262 struct file_md *i = &li->md.file;
1263 struct file_metadata *l = &lai->metadata[0].file;
1264 struct dir_metadata *d = &lai->metadata[0].dir;
1265 struct special_metadata *s = &lai->metadata[0].special;
1267 if (li->type < TypeBase)
1269 l->flags = cpu_to_le16(i->flags);
1270 l->mode = cpu_to_le16(ino->i_mode);
1271 l->userid = cpu_to_le32(ino->i_uid);
1272 l->groupid = cpu_to_le32(ino->i_gid);
1273 l->treeid = cpu_to_le32(i->treeid);
1274 l->creationtime = cpu_to_le64(i->creationtime);
1275 l->modifytime = cpu_to_le64(encode_time(&ino->i_mtime));
1276 l->ctime = cpu_to_le64(encode_time(&ino->i_ctime));
1277 l->accesstime = cpu_to_le64(encode_time(&ino->i_atime));
1278 /* FIXME write 0 to accesstime file */
1279 l->size = cpu_to_le64(ino->i_size);
1280 l->parent = cpu_to_le32(i->parent);
1281 l->linkcount = cpu_to_le32(ino->i_nlink);
1287 d->hash_seed = cpu_to_le32(i->seed);
1292 s->major = cpu_to_le32(MAJOR(ino->i_rdev));
1293 s->minor = cpu_to_le32(MINOR(ino->i_rdev));
1298 unmap_dblock(db, lai);
1301 /*-----------------------------------------------------------------------
1302 * Inode allocate map handling.
1303 * Inode 1 of each fileset is a bitmap of free inode numbers.
1304 * Whenever the file is extended in size, new bits are set to one. They
1305 * are then cleared when the inode is allocated. When a block becomes
1306 * full of zeros, we don't need to store it any more.
1308 * We don't clear the bit until we are committed to creating an inode
1309 * This means we cannot clear it straight away, so two different threads
1310 * might see the same inode number as being available. We have two
1311 * approaches to guard against this.
1312 * Firstly we have a 'current' pointer into the inodemap file and
1313 * increase that past the inode we return. This discourages multiple
1314 * hits but as the pointer would need to be rewound occasionally it
1315 * isn't a guarantee. The guarantee against multiple allocations is done
1316 * via a flag in the block representing an inode. This is set
1317 * while an inode is being allocated.
1320 /* inode number allocation has the prealloc/pin/commit/abort structure
1321 * so it can be committed effectively
1325 choose_free_inum(struct fs *fs, struct super_block *sb, u32 *inump,
1326 struct datablock **bp, int *restarted)
1328 struct inode *im = lafs_iget(sb, 1, SYNC);
1330 struct datablock *b;
1336 struct inode *i = (*bp)->b.inode;
1337 putdref(*bp, MKREF(cfi_map));
1342 mutex_lock_nested(&im->i_mutex, I_MUTEX_QUOTA);
1344 bnum = LAFSI(im)->md.inodemap.thisblock;
1346 if (bnum == NoBlock ||
1347 LAFSI(im)->md.inodemap.nextbit >= (fs->blocksize<<3)) {
1348 if (bnum == NoBlock)
1349 bnum = LAFSI(im)->md.inodemap.size;
1351 if (bnum+1 < LAFSI(im)->md.inodemap.size)
1353 else if (!*restarted) {
1357 /* Need to add a new block to the file */
1358 bnum = LAFSI(im)->md.inodemap.size;
1359 b = lafs_get_block(im, bnum, NULL, GFP_KERNEL,
1364 lafs_iolock_written(&b->b);
1365 set_bit(B_PinPending, &b->b.flags);
1366 lafs_iounlock_block(&b->b);
1368 lafs_checkpoint_lock(fs);
1369 err = lafs_pin_dblock(b, NewSpace);
1370 if (err == -EAGAIN) {
1371 lafs_checkpoint_unlock_wait(fs);
1377 buf = map_dblock(b);
1378 /* Set block to "all are free" */
1379 memset(buf, 0xff, fs->blocksize);
1380 unmap_dblock(b, buf);
1381 set_bit(B_Valid, &b->b.flags);
1382 LAFSI(im)->md.inodemap.size = bnum+1;
1383 lafs_dirty_inode(im);
1384 lafs_dirty_dblock(b);
1385 lafs_checkpoint_unlock(fs);
1386 putdref(b, MKREF(cfi_map));
1389 err = lafs_find_next(im, &bnum);
1395 LAFSI(im)->md.inodemap.nextbit = 0;
1396 LAFSI(im)->md.inodemap.thisblock = bnum;
1399 b = lafs_get_block(im, bnum, NULL, GFP_KERNEL, MKREF(cfi_map));
1403 err = lafs_find_block(b, NOADOPT);
1406 if (b->b.physaddr == 0 && !test_bit(B_Valid, &b->b.flags)) {
1407 LAFSI(im)->md.inodemap.nextbit =
1408 (fs->blocksize<<3) + 1;
1409 putdref(b,MKREF(cfi_map));
1412 err = lafs_read_block(b);
1416 bit = LAFSI(im)->md.inodemap.nextbit;
1417 LAFSI(im)->md.inodemap.thisblock = bnum;
1418 buf = map_dblock(b);
1419 while (bnum == 0 && bit < 16) {
1420 /* Never return an inum below 16 - they are special */
1421 if (!generic_test_le_bit(bit, (unsigned long *)buf))
1422 generic___clear_le_bit(bit, (unsigned long *)buf);
1426 bit = generic_find_next_le_bit((unsigned long *)buf,
1427 fs->blocksize<<3, bit);
1428 unmap_dblock(b, buf);
1429 LAFSI(im)->md.inodemap.nextbit = bit+1;
1430 if (bit >= fs->blocksize<<3) {
1431 putdref(b,MKREF(cfi_map));
1434 mutex_unlock(&im->i_mutex);
1436 *inump = bit + (bnum << (im->i_blkbits + 3));
1440 lafs_checkpoint_unlock(fs);
1442 putdref(b, MKREF(cfi_map));
1444 mutex_unlock(&im->i_mutex);
1449 struct inode_map_new_info {
1450 struct datablock *ib, *mb;
1454 inode_map_new_prepare(struct fs *fs, int inum, struct super_block *sb,
1455 struct inode_map_new_info *imni)
1460 struct datablock *b;
1462 imni->ib = imni->mb = NULL;
1465 /* choose a possibly-free inode number */
1466 err = choose_free_inum(fs, sb, &choice,
1467 &imni->mb, &restarted);
1471 b = lafs_get_block(ino_from_sb(sb), choice, NULL, GFP_KERNEL,
1476 if (test_and_set_bit(B_Claimed, &b->b.flags)) {
1477 putdref(b, MKREF(cfi_ino));
1483 lafs_iolock_written(&imni->mb->b);
1484 set_bit(B_PinPending, &imni->mb->b.flags);
1485 lafs_iounlock_block(&imni->mb->b);
1487 set_bit(B_PinPending, &b->b.flags);
1494 inode_map_new_pin(struct inode_map_new_info *imni)
1498 err = lafs_pin_dblock(imni->mb, NewSpace);
1499 err = err ?: lafs_pin_dblock(imni->ib, NewSpace);
1504 inode_map_new_commit(struct inode_map_new_info *imni)
1509 int blksize = imni->ib->b.inode->i_sb->s_blocksize;
1510 int bit = imni->ib->b.fileaddr & (blksize*8 - 1);
1512 struct inode *ino = imni->mb->b.inode;
1514 mutex_lock_nested(&ino->i_mutex, I_MUTEX_QUOTA);
1515 buf = map_dblock(imni->mb);
1516 generic___clear_le_bit(bit, buf);
1517 if (buf[blksize/sizeof(*buf)-1] == 0 &&
1518 generic_find_next_le_bit(buf, blksize*8, 0) == blksize*8)
1519 /* block is empty, punch a hole */
1522 unmap_dblock(imni->mb, buf);
1524 lafs_erase_dblock(imni->mb);
1526 lafs_dirty_dblock(imni->mb);
1528 putdref(imni->mb, MKREF(cfi_map));
1529 mutex_unlock(&ino->i_mutex);
1532 putdref(imni->ib, MKREF(cfi_ino));
1536 inode_map_new_abort(struct inode_map_new_info *imni)
1539 clear_bit(B_Claimed, &imni->ib->b.flags);
1540 clear_bit(B_PinPending, &imni->ib->b.flags);
1541 lafs_orphan_release(fs_from_inode(imni->ib->b.inode),
1544 putdref(imni->ib, MKREF(cfi_ino));
1546 struct inode *ino = imni->mb->b.inode;
1547 putdref(imni->mb, MKREF(cfi_map));
1553 lafs_new_inode(struct fs *fs, struct super_block *sb, struct inode *dir,
1554 int type, int inum, int mode, struct datablock **inodbp)
1556 /* allocate and instantiate a new inode. If inum is non-zero,
1557 * choose any number, otherwise we are creating a special inode
1558 * and have to use the given number.
1559 * This creation is committed independently of any name that might
1560 * subsequently be given to the inode. So we register it as an
1561 * orphan so that it will be cleaned up if the name isn't
1562 * successfully created
1566 struct datablock *b;
1567 struct inode_map_new_info imni;
1568 struct update_handle ui;
1571 err = inode_map_new_prepare(fs, inum, sb, &imni);
1572 err = lafs_cluster_update_prepare(&ui, fs, sizeof(struct la_inode))
1575 err = lafs_make_orphan(fs, imni.ib);
1579 lafs_checkpoint_lock(fs);
1581 err = inode_map_new_pin(&imni);
1583 if (err == -EAGAIN) {
1584 lafs_checkpoint_unlock_wait(fs);
1590 b = getdref(imni.ib, MKREF(inode_new));
1592 lafs_iolock_block(&b->b); /* make sure we don't race with the cleaner
1593 * and zero this inode while trying to load it
1595 lafs_inode_init(b, type, mode, dir);
1596 lafs_iounlock_block(&b->b);
1598 inode_map_new_commit(&imni);
1599 ino = lafs_iget(sb, b->b.fileaddr, SYNC);
1601 lafs_cluster_update_abort(&ui);
1604 lafs_cluster_update_commit(&ui, b, 0,
1605 LAFSI(ino)->metadata_size);
1606 LAFS_BUG(LAFSI(ino)->dblock != b, &b->b);
1607 LAFS_BUG(b->my_inode != ino, &b->b);
1608 lafs_checkpoint_unlock(fs);
1613 putdref(b, MKREF(inode_new));
1617 lafs_checkpoint_unlock(fs);
1620 inode_map_new_abort(&imni);
1621 lafs_cluster_update_abort(&ui);
1622 dprintk("After abort %d: %s\n", err, strblk(&imni.ib->b));
1623 return ERR_PTR(err);
1626 static int inode_map_free(struct fs *fs, struct super_block *sb, u32 inum)
1628 struct inode *im = lafs_iget(sb, 1, SYNC);
1631 struct datablock *b;
1635 mutex_lock_nested(&im->i_mutex, I_MUTEX_QUOTA);
1637 bnum = inum >> (3 + sb->s_blocksize_bits);
1638 bit = inum - (bnum << (3 + sb->s_blocksize_bits));
1639 b = lafs_get_block(im, bnum, NULL, GFP_KERNEL, MKREF(inode_map_free));
1641 mutex_unlock(&im->i_mutex);
1645 err = lafs_read_block(b);
1647 putdref(b, MKREF(inode_map_free));
1648 mutex_unlock(&im->i_mutex);
1652 lafs_iolock_written(&b->b);
1653 set_bit(B_PinPending, &b->b.flags);
1654 lafs_iounlock_block(&b->b);
1656 lafs_checkpoint_lock(fs);
1657 err = lafs_pin_dblock(b, ReleaseSpace);
1658 if (err == -EAGAIN) {
1659 lafs_checkpoint_unlock_wait(fs);
1663 buf = map_dblock(b);
1664 generic___set_le_bit(bit, buf);
1665 unmap_dblock(b, buf);
1666 lafs_dirty_dblock(b);
1667 putdref(b, MKREF(inode_map_free));
1668 lafs_checkpoint_unlock(fs);
1669 mutex_unlock(&im->i_mutex);
1674 int lafs_inode_inuse(struct fs *fs, struct super_block *sb, u32 inum)
1676 /* This is used during roll-forward to register a newly created
1677 * inode in the inode map
1679 struct inode *im = lafs_iget(sb, 1, SYNC);
1682 struct datablock *b;
1686 mutex_lock_nested(&im->i_mutex, I_MUTEX_QUOTA);
1688 bnum = inum >> (3 + sb->s_blocksize_bits);
1689 bit = inum - (bnum << (3 + sb->s_blocksize_bits));
1690 if (bnum > LAFSI(im)->md.inodemap.size) {
1691 /* inum to unbelievably big */
1692 mutex_unlock(&im->i_mutex);
1696 b = lafs_get_block(im, bnum, NULL, GFP_KERNEL, MKREF(inode_map_free));
1698 mutex_unlock(&im->i_mutex);
1703 err = lafs_read_block(b);
1705 putdref(b, MKREF(inode_map_free));
1706 mutex_unlock(&im->i_mutex);
1711 lafs_iolock_written(&b->b);
1712 set_bit(B_PinPending, &b->b.flags);
1713 lafs_iounlock_block(&b->b);
1715 lafs_checkpoint_lock(fs);
1716 err = lafs_pin_dblock(b, CleanSpace);
1717 if (err == -EAGAIN) {
1718 lafs_checkpoint_unlock_wait(fs);
1722 buf = map_dblock(b);
1723 if (bnum == LAFSI(im)->md.inodemap.size) {
1724 /* need to add a new block to the file */
1725 memset(buf, 0xff, fs->blocksize);
1726 LAFSI(im)->md.inodemap.size = bnum + 1;
1727 lafs_dirty_inode(im);
1729 generic___clear_le_bit(bit, buf);
1730 unmap_dblock(b, buf);
1731 lafs_dirty_dblock(b);
1732 putdref(b, MKREF(inode_map_free));
1733 lafs_checkpoint_unlock(fs);
1734 mutex_unlock(&im->i_mutex);
1741 int lafs_setattr(struct dentry *dentry, struct iattr *attr)
1744 struct inode *ino = dentry->d_inode;
1745 struct fs *fs = fs_from_inode(ino);
1746 struct datablock *db;
1748 err = inode_change_ok(ino, attr);
1749 db = lafs_inode_dblock(ino, SYNC, MKREF(setattr));
1755 /* We don't need iolock_written here as we don't
1756 * actually change the inode block yet
1758 lafs_iolock_block(&db->b);
1759 set_bit(B_PinPending, &db->b.flags);
1760 lafs_iounlock_block(&db->b);
1762 /* FIXME quota stuff */
1765 lafs_checkpoint_lock(fs);
1766 err = lafs_pin_dblock(db, ReleaseSpace);
1767 if (err == -EAGAIN) {
1768 lafs_checkpoint_unlock_wait(fs);
1771 /* inode_setattr calls lafs_dirty_inode, which sets
1772 * I_Dirty so the dblock will get updated.
1774 err = err ?: inode_setattr(ino, attr);
1776 lafs_dirty_dblock(db);
1777 clear_bit(B_PinPending, &db->b.flags);
1778 putdref(db, MKREF(setattr));
1779 lafs_checkpoint_unlock(fs);
1784 void lafs_truncate(struct inode *ino)
1786 /* Want to truncate this file.
1787 * i_size has already been changed, and the address space
1788 * has been cleaned up.
1789 * So just start the background truncate
1791 struct fs *fs = fs_from_inode(ino);
1792 struct datablock *db = lafs_inode_dblock(ino, SYNC, MKREF(trunc));
1799 trunc_block = ((i_size_read(ino) + fs->blocksize - 1)
1800 >> fs->blocksize_bits);
1801 /* We hold i_mutex, so regular orphan processing cannot
1802 * contine - we have to push it forward ourselves.
1804 while (test_bit(I_Trunc, &LAFSI(ino)->iflags) &&
1805 LAFSI(ino)->trunc_next < trunc_block) {
1806 prepare_to_wait(&fs->async_complete, &wq,
1807 TASK_UNINTERRUPTIBLE);
1808 lafs_inode_handle_orphan(db);
1809 if (test_bit(B_Orphan, &db->b.flags))
1812 finish_wait(&fs->async_complete, &wq);
1814 /* There is nothing we can do about errors here. The
1815 * most likely are ENOMEM which itself is very unlikely.
1816 * If this doesn't get registered as an orphan .... maybe
1817 * it will have to wait until something else truncates it.
1819 lafs_make_orphan(fs, db);
1821 if (!test_and_set_bit(I_Trunc, &LAFSI(ino)->iflags))
1823 if (trunc_block == 0)
1824 LAFSI(ino)->trunc_gen++;
1825 LAFSI(ino)->trunc_next = trunc_block;
1826 putdref(db, MKREF(trunc));
1829 const struct inode_operations lafs_special_ino_operations = {
1830 .setattr = lafs_setattr,
1831 .truncate = lafs_truncate,