]> git.neil.brown.name Git - LaFS.git/blob - dir.c
Mount: need to return the superblock with s_umount held.
[LaFS.git] / dir.c
1
2 /*
3  * fs/lafs/dir.c
4  * Copyright (C) 2005-2009
5  * Neil Brown <neilb@suse.de>
6  * Released under the GPL, version 2
7  *
8  * Directory operations
9  */
10
11 #include        "lafs.h"
12 #include        <linux/slab.h>
13
14 #define S_SHIFT 12
15
16 static unsigned char file_type[1 + (S_IFMT >> S_SHIFT)] = {
17         [S_IFREG >> S_SHIFT]    = DT_REG,
18         [S_IFDIR >> S_SHIFT]    = DT_DIR,
19         [S_IFCHR >> S_SHIFT]    = DT_CHR,
20         [S_IFBLK >> S_SHIFT]    = DT_BLK,
21         [S_IFIFO >> S_SHIFT]    = DT_FIFO,
22         [S_IFSOCK >> S_SHIFT]   = DT_SOCK,
23         [S_IFLNK >> S_SHIFT]    = DT_LNK,
24 };
25
26 static inline int mode_to_dt(int mode)
27 {
28         return file_type[(mode & S_IFMT) >> S_SHIFT];
29 }
30
31 /*****************************************************************************
32  * Directory lookup
33  * Most of the work is done in dir_lookup_blk which returns the block
34  * that should hold the entry.
35  * dir_lookup takes that result and finds an inode number if possible.
36  */
37 static struct datablock *
38 dir_lookup_blk(struct inode *dir, const char *name, int nlen,
39                u8 *pp, int forcreate, u32 *hashp, REFARG)
40 {
41         struct lafs_inode *lai = LAFSI(dir);
42         u32 seed = lai->md.file.seed;
43         struct datablock *b, *cb = NULL;
44         loff_t bn;
45         u32 hash;
46         int err;
47         int bits = dir->i_blkbits-8;
48         if (nlen == 0)
49                 nlen = strlen(name);
50
51         hash = lafs_hash_name(seed, nlen, name);
52         dprintk("name= %s hash=%lu\n", name, (unsigned long)bn);
53
54         while (1) {
55                 char *buf;
56                 bn = hash+1;
57                 dprintk("bn=%lu\n", (unsigned long)bn);
58                 if (lafs_find_next(dir, &bn) == 0)
59                         bn = 0;
60                 dprintk("now=%lu\n", (unsigned long)bn);
61
62                 err = -ENOMEM;
63                 b = lafs_get_block(dir, bn, NULL, GFP_KERNEL, REF);
64                 if (!b)
65                         break;
66                 err = lafs_read_block(b);
67                 if (err)
68                         break;
69                 buf = map_dblock(b);
70                 if (lafs_trace) {
71                         void lafs_dir_print(char *buf, int psz);
72                         lafs_dir_print(buf, bits);
73                 }
74                 for ( ; hash < (bn ? bn : MaxDirHash) ; hash++) {
75                         u8 piece;
76                         struct dir_ent de;
77                         int rv = lafs_dir_find(buf, bits, seed, hash, &piece);
78
79                         if (rv == 0) {
80                                 unmap_dblock(b, buf);
81                                 if (forcreate) {
82                                         if (cb) {
83                                                 putdref(b, REF);
84                                                 b = cb;
85                                         } else
86                                                 *hashp = hash;
87                                         return b;
88                                 }
89                                 putdref(b, REF);
90                                 return ERR_PTR(-ENOENT);
91                         }
92                         lafs_dir_extract(buf, bits, &de, piece, NULL);
93                         if (!de.target) {
94                                 if (forcreate && !cb) {
95                                         cb = getdref(b, REF);
96                                         *hashp = hash;
97                                 }
98                                 continue;
99                         }
100                         if (de.nlen != nlen)
101                                 continue;
102                         if (strncmp(de.name, name, de.nlen) != 0)
103                                 continue;
104                         unmap_dblock(b, buf);
105                         if (forcreate) {
106                                 putdref(b, REF);
107                                 if (cb)
108                                         putdref(cb, REF);
109                                 return ERR_PTR(-EEXIST);
110                         }
111                         *pp = piece;
112                         if (hashp)
113                                 *hashp = hash;
114                         return b;
115                 }
116                 unmap_dblock(b, buf);
117                 putdref(b, REF);
118         }
119
120         putdref(b, REF);
121         putdref(cb, REF);
122         return ERR_PTR(err);
123 }
124
125 static int
126 dir_lookup(struct inode *dir, const char *name, int nlen,
127            u32 *inum)
128 {
129         /*
130          * return
131          *    -ve on error
132          *     0 on not found
133          *  type on found
134          */
135         struct datablock *b;
136         char *buf;
137         struct dir_ent de;
138         int bits = dir->i_blkbits-8;
139         u8 piece;
140
141         b = dir_lookup_blk(dir, name, nlen, &piece, 0, NULL, MKREF(dir_lookup));
142         if (IS_ERR(b))
143                 return PTR_ERR(b);
144         buf = map_dblock(b);
145         *inum = lafs_dir_extract(buf, bits, &de, piece, NULL)->target;
146         unmap_dblock(b, buf);
147         putdref(b, MKREF(dir_lookup));
148         return 1;
149 }
150
151 /*****************************************************************************
152  *
153  * Directory modification routines.
154  * We have sets of prepare, pin, commit, abort.
155  * 'prepare' happens outside of a phase-lock and can allocate
156  * and reserve blocks, and is free to fail.  Everything it does
157  * should be revertable - or atomically committed.
158  * 'pin' happens inside a phase-lock and should pin any blocks
159  * that might need to be dirtied.  It only fails if the pin fails.
160  * 'commit' is called inside the phase-lock if prepare and pin succeed.
161  * is must clean up anything that was set up by 'prepare'.
162  * 'abort' is called if anything fails.  It too should clean up anything
163  * that prepare sets up.
164  * If 'prepare' is called, then either 'commit' or 'abort' will be called
165  * so failure in prepare can leave cleanup to 'abort'.
166  *
167  * The whole processes uses a dirop_handle to store various aspects
168  * of state that might need to be unwound or committed.
169  * A compound operation such as rename may included several
170  * simple operations such as delete + create.  In that case there
171  * will be a separate dirop_handle for each simple operation.
172  * There will only be one create
173  */
174 struct dirop_handle {
175         struct datablock *dirent_block;
176         struct datablock *new;
177         char    *temp;
178         u32     hash;
179         u8      index;
180         int     chainoffset;
181 };
182
183 /*............................................................................
184  * Creating an entry in a directory.
185  * This is split into pre_create and commit_create
186  * We already know that the name doesn't exist so a lookup will fail,
187  * but will find the right place in the tree.
188  * pre_create allocates blocks as needed and stores info in the dirop_handle.
189  * commit_create finalises the create and cannot fail.
190  */
191
192 static int dir_create_prepare(struct fs *fs, struct inode *dir,
193                               const char *name, int nlen,
194                               u32 inum, int type,
195                               struct dirop_handle *doh)
196 {
197         /*
198          * We need one or 2 blocks
199          * - a dirent block with space to receive the new entry.
200          * - A free block into which the block index may split
201          * These will be pinned and allocated credits before we commit.
202          *
203          * For a single-block which still has room,
204          * we only need the first of those.
205          *
206          */
207         int blocksize = fs->blocksize;
208         struct datablock *dirblk;
209         char *buf, *n1, *n2;
210         int bits = dir->i_blkbits-8;
211         u8 piece;
212         struct lafs_inode *lai = LAFSI(dir);
213         u32 seed = lai->md.file.seed;
214         struct dirheader *dh;
215         int chainoffset;
216         u32 hash, newhash;
217         int rv;
218
219         doh->temp = NULL;
220         doh->new = NULL;
221
222         doh->dirent_block =
223                 dirblk = dir_lookup_blk(dir, name, nlen, &piece, 1, &hash,
224                                         MKREF(dir_blk));
225
226         if (IS_ERR(dirblk))
227                 return PTR_ERR(dirblk);
228
229         lafs_iolock_written(&dirblk->b);
230         set_bit(B_PinPending, &dirblk->b.flags);
231         /* i_mutex protect us now, so don't need to maintain the lock */
232         lafs_iounlock_block(&dirblk->b);
233
234         chainoffset = hash - lafs_hash_name(seed, nlen, name);
235         buf = map_dblock(dirblk);
236         rv = lafs_dir_add_ent(buf, bits, name, nlen, 0, DT_TEST,
237                               seed, hash, chainoffset);
238         unmap_dblock(dirblk, buf);
239         if (rv < 0)
240                 return -EEXIST;
241         if (rv == 1) {
242                 doh->hash = hash;
243                 doh->chainoffset = chainoffset;
244                 return 0;
245         }
246         /* Doesn't fit, try repacking */
247
248         buf = map_dblock(dirblk);
249         dh = (struct dirheader *)buf;
250         if (dh->freepieces >= space_needed(nlen, chainoffset, bits)) {
251                 char *tmp;
252                 unmap_dblock(dirblk, buf);
253                 tmp = kmalloc(256<<bits, GFP_KERNEL);
254                 if (tmp) {
255                         buf = map_dblock(dirblk);
256                         lafs_dir_repack(buf, bits, tmp, seed, 0);
257                         if (lafs_dir_add_ent(tmp, bits, name, nlen, 0, DT_TEST,
258                                              seed, hash, chainoffset)) {
259                                 memcpy(buf, tmp, blocksize);
260                                 unmap_dblock(dirblk, buf);
261                                 kfree(tmp);
262                                 doh->hash = hash;
263                                 doh->chainoffset = chainoffset;
264                                 return 0;
265                         }
266                         kfree(tmp);
267                 } else {
268                         unmap_dblock(dirblk, buf);
269                         return -ENOMEM;
270                 }
271         }
272         unmap_dblock(dirblk, buf);
273
274         /* Really doesn't fit, need to split.
275          * We have to perform the split now so that we can choose a new
276          * index and pin that block.
277          */
278         n1 = kmalloc(blocksize, GFP_KERNEL);
279         n2 = kmalloc(blocksize, GFP_KERNEL);
280         if (!n1 || !n2) {
281                 kfree(n1); kfree(n2);
282                 return -ENOMEM;
283         }
284         doh->new = lafs_get_block(dir, newhash+1, NULL, GFP_KERNEL,
285                                   MKREF(dir_new));
286         if (doh->new == NULL) {
287                 kfree(n1); kfree(n2);
288                 return -ENOMEM;
289         }
290         buf = map_dblock(dirblk);
291         lafs_dir_split(buf, bits, n1, n2, name, inum, type,
292                        &newhash, seed, hash, chainoffset);
293         unmap_dblock(dirblk, buf);
294         buf = map_dblock(doh->new);
295         memcpy(buf, n1, blocksize);
296         unmap_dblock(doh->new, buf);
297         set_bit(B_Valid, &doh->new->b.flags);
298         set_bit(B_PinPending, &doh->new->b.flags);
299         kfree(n1);
300         doh->temp = n2;
301         return 0;
302 }
303
304 static void
305 dir_create_commit(struct dirop_handle *doh,
306                   struct fs *fs, struct inode *dir,
307                   const char *name, int nlen, u32 target, int type)
308 {
309         /* We are committed to creating this entry.
310          * Everything has been allocated and pinned.
311          * All we do is:
312          * - add name to doh->dirent_block.
313          * - if doh->index == NULL, done.
314          * - possibly update indexslot to have address of doh->dirent_block.
315          * - if doh->new, split doh->index into doh->new adding doh->previndex
316          * - else add doh->previndex into doh->index
317          */
318         char *buf;
319         int bits = fs->blocksize_bits - 8;
320         int blocksize = fs->blocksize;
321         struct lafs_inode *lai = LAFSI(dir);
322         u32 seed = lai->md.file.seed;
323
324         buf = map_dblock(doh->dirent_block);
325
326         if (doh->new) {
327                 /* We did a split, and have the block ready to go */
328                 memcpy(buf, doh->temp, blocksize);
329                 kfree(doh->temp);
330                 doh->temp = NULL;
331                 lafs_dirty_dblock(doh->new);
332                 if ((((loff_t)doh->new->b.fileaddr+1) << dir->i_blkbits)
333                     > dir->i_size) {
334                         i_size_write(dir, (((loff_t)doh->new->b.fileaddr+1)
335                                            << dir->i_blkbits));
336                         lafs_dirty_inode(dir);
337                 }
338                 clear_bit(B_PinPending, &doh->new->b.flags);
339                 putdref(doh->new, MKREF(dir_new));
340         } else
341                 lafs_dir_add_ent(buf, bits, name, nlen, target,
342                                  type, seed, doh->hash, doh->chainoffset);
343         lafs_dirty_dblock(doh->dirent_block);
344         if (dir->i_size <= blocksize) {
345                 /* Make dir fit in inode if possible */
346                 i_size_write(dir, lafs_dir_blk_size(buf, bits));
347                 lafs_dirty_inode(dir);
348         }
349         clear_bit(B_PinPending, &doh->dirent_block->b.flags);
350         unmap_dblock(doh->dirent_block, buf);
351         putdref(doh->dirent_block, MKREF(dir_blk));
352 }
353
354 static int
355 dir_create_pin(struct dirop_handle *doh)
356 {
357         int err;
358         err = lafs_pin_dblock(doh->dirent_block, NewSpace);
359         if (err || doh->new == NULL)
360                 return err;
361         err = lafs_pin_dblock(doh->new, NewSpace);
362         return err;
363 }
364
365 static void
366 dir_create_abort(struct dirop_handle *doh)
367 {
368         kfree(doh->temp);
369         if (!IS_ERR(doh->dirent_block) && doh->dirent_block) {
370                 clear_bit(B_PinPending, &doh->dirent_block->b.flags);
371                 putdref(doh->dirent_block, MKREF(dir_blk));
372         }
373         if (!IS_ERR(doh->new) && doh->new) {
374                 clear_bit(B_PinPending, &doh->new->b.flags);
375                 putdref(doh->new, MKREF(dir_new));
376         }
377 }
378
379 /*---------------------------------------------------------------
380  * Delete directory entry.
381  * Deleting involves invalidating the entry in the dirent block,
382  * and then removing entry deleted entries that are not in a chain.
383  * If we cannot be sure, we schedule orphan processing to do
384  * the fine details of chain clearing.
385  */
386 static int
387 dir_delete_prepare(struct fs *fs, struct inode *dir,
388                const char *name, int nlen, struct dirop_handle *doh)
389 {
390         struct datablock *dirblk;
391         int orphan = 0;
392
393         doh->dirent_block =
394                 dirblk = dir_lookup_blk(dir, name, nlen, &doh->index,
395                                         0, &doh->hash, MKREF(dir_blk));
396         if (IS_ERR(dirblk) && PTR_ERR(dirblk) == -ENOENT) {
397                 lafs_trace = 1;
398                 dirblk = dir_lookup_blk(dir, name, nlen, &doh->index,
399                                         0, &doh->hash, MKREF(dir_blk));
400                 if (!IS_ERR(dirblk))
401                         printk("Weird: %s\n", strblk(&dirblk->b));
402                 lafs_trace = 0;
403         }
404
405         if (IS_ERR(dirblk))
406                 return PTR_ERR(dirblk);
407         lafs_iolock_written(&dirblk->b);
408         set_bit(B_PinPending, &dirblk->b.flags);
409         /* i_mutex protect us now, so don't need to maintain the lock */
410         lafs_iounlock_block(&dirblk->b);
411
412         /* Only make this block an orphan if there is a real
413          * possibilitiy.
414          * i.e. one of
415          *    We found the last possible entry
416          *    We found the first entry
417          *    We found the only entry (in which case we found the first)
418          *    First entry is deleted
419          */
420         if (((doh->hash+1) & MaxDirHash) == doh->dirent_block->b.fileaddr)
421                 /* Last possible entry is being remove */
422                 orphan=1;
423         if (!orphan) {
424                 u32 seed = LAFSI(dir)->md.file.seed;
425                 u8 firstpiece = 0;
426                 struct dir_ent de;
427                 char bits = dir->i_blkbits - 8;
428                 char *buf = map_dblock(dirblk);
429                 lafs_dir_find(buf, bits, seed, 0, &firstpiece);
430                 if (doh->index == firstpiece ||
431                     lafs_dir_extract(buf, bits, &de,
432                                      firstpiece, NULL)->target == 0)
433                         orphan = 1;
434                 unmap_dblock(dirblk, buf);
435         }
436         if (orphan)
437                 return lafs_make_orphan(fs, doh->dirent_block);
438         return 0;
439 }
440
441 static void
442 dir_delete_commit(struct dirop_handle *doh,
443                   struct fs *fs, struct inode *dir,
444                   const char *name, int nlen)
445 {
446         char *buf = map_dblock(doh->dirent_block);
447         char bits = dir->i_blkbits - 8;
448         struct dir_ent de;
449         u32 seed = LAFSI(dir)->md.file.seed;
450         u8 ignore;
451
452         /* First mark the entry as deleted, then consider removing it*/
453         de.target = 0;
454         de.type = 0;
455         lafs_dir_set_target(buf, bits, &de, doh->index);
456
457         /* If 'hash+1' is not in this block, make me an orphan
458          *   (as we cannot check the chain)
459          * If it is and exists, do nothing (could be in active chain).
460          * If it doesn't exist:
461          *    Remove this entry and any earlier deleted entries in a chain,
462          *    but don't remove the first entry in the block.
463          *    If we end up leaving that first entry, make me an orphan so
464          *    the we can check if the chain continues in a previous block.
465          */
466         if (((doh->hash+1) & MaxDirHash) == doh->dirent_block->b.fileaddr)
467                 unmap_dblock(doh->dirent_block, buf);
468         else if (lafs_dir_find(buf, bits, seed, doh->hash+1, &ignore) == 0) {
469                 /* This is the end of a chain, clean up */
470                 u8 firstpiece;
471                 u8 piece;
472                 u32 hash;
473
474                 lafs_dir_find(buf, bits, seed, 0, &firstpiece);
475                 hash = doh->hash; piece = doh->index;
476                 do {
477                         if (piece == firstpiece)
478                                 break;
479                         lafs_dir_del_ent(buf, bits, seed, hash);
480                         BUG_ON(hash == 0 || doh->hash - hash > 256);
481                         hash--;
482                 } while (lafs_dir_find(buf, bits, seed, hash, &piece) &&
483                          lafs_dir_extract(buf, bits, &de, piece,
484                                           NULL)->target == 0);
485
486                 unmap_dblock(doh->dirent_block, buf);
487         } else
488                 unmap_dblock(doh->dirent_block, buf);
489
490         lafs_dirty_dblock(doh->dirent_block);
491         clear_bit(B_PinPending, &doh->dirent_block->b.flags);
492         putdref(doh->dirent_block, MKREF(dir_blk));
493 }
494
495 static int
496 dir_delete_pin(struct dirop_handle *doh)
497 {
498         int err;
499         err = lafs_pin_dblock(doh->dirent_block, ReleaseSpace);
500         if (err)
501                 return err;
502         return 0;
503 }
504
505 static void
506 dir_delete_abort(struct dirop_handle *doh)
507 {
508         if (doh->dirent_block &&
509             !IS_ERR(doh->dirent_block)) {
510                 clear_bit(B_PinPending, &doh->dirent_block->b.flags);
511                 putdref(doh->dirent_block, MKREF(dir_blk));
512         }
513 }
514
515 /*--------------------------------------------------------------
516  * Update directory entry
517  * This is used for rename when the target already exists
518  * Rather than delete+create it becomes delete+update
519  * This is even similar to delete except that we don't bother
520  * with orphans.
521  */
522 static int
523 dir_update_prepare(struct fs *fs, struct inode *dir,
524                    const char *name, int nlen, struct dirop_handle *doh)
525 {
526         struct datablock *dirblk;
527
528         doh->dirent_block =
529                 dirblk = dir_lookup_blk(dir, name, nlen, &doh->index,
530                                         0, NULL, MKREF(dir_blk));
531         if (IS_ERR(dirblk))
532                 return PTR_ERR(dirblk);
533         lafs_iolock_written(&dirblk->b);
534         set_bit(B_PinPending, &dirblk->b.flags);
535         /* i_mutex protect us now, so don't need to maintain the lock */
536         lafs_iounlock_block(&dirblk->b);
537         return 0;
538 }
539
540 static void
541 dir_update_commit(struct fs *fs, u32 target, int type,
542                   struct dirop_handle *doh)
543 {
544         char *buf = map_dblock(doh->dirent_block);
545         int bits = doh->dirent_block->b.inode->i_blkbits - 8;
546         struct dir_ent de;
547
548         de.target = target;
549         de.type = type;
550         lafs_dir_set_target(buf, bits, &de, doh->index);
551         unmap_dblock(doh->dirent_block, buf);
552         lafs_dirty_dblock(doh->dirent_block);
553         clear_bit(B_PinPending, &doh->dirent_block->b.flags);
554         putdref(doh->dirent_block, MKREF(dir_blk));
555 }
556
557 static int
558 dir_update_pin(struct dirop_handle *doh)
559 {
560         return lafs_pin_dblock(doh->dirent_block, ReleaseSpace);
561 }
562
563 static void
564 dir_update_abort(struct dirop_handle *doh)
565 {
566         if (doh->dirent_block &&
567             !IS_ERR(doh->dirent_block)) {
568                 clear_bit(B_PinPending, &doh->dirent_block->b.flags);
569                 putdref(doh->dirent_block, MKREF(dir_blk));
570         }
571 }
572
573 /*------------------------------------------------------------
574  * Directory operations needs to be logged as transactions.
575  * The transaction is formed from the name preceded by 4 bytes
576  * for the inode number.
577  * A look to allow matching of related update is stored in the block number.
578  * The 'type' of transaction is recorded in the offset
579  */
580
581 static int dir_log_prepare(struct update_handle *uh,
582                            struct fs *fs,
583                            struct qstr *name)
584 {
585         return lafs_cluster_update_prepare(uh, fs, name->len+4);
586 }
587
588 static void dir_log_commit(struct update_handle *uh,
589                            struct fs *fs, struct inode *dir,
590                            struct qstr *name, u32 target,
591                            int operation, u32 *handle)
592 {
593         char mb[4];
594         static u32 hancnt;
595         u32 han = 0;
596
597         switch (operation) {
598         case DIROP_LINK:
599         case DIROP_UNLINK:
600                 han = 0;
601                 break;
602         case DIROP_REN_SOURCE:
603                 while (++hancnt == 0)
604                         ;
605                 han = hancnt;
606                 *handle = han;
607                 break;
608         case DIROP_REN_TARGET:
609                 han = *handle;
610                 break;
611         default:
612                 BUG();
613         }
614
615         *(u32 *)mb = cpu_to_le32(target);
616         lafs_cluster_update_commit_buf(uh, fs, dir, han, operation,
617                                        4+name->len, mb,
618                                        name->len, name->name);
619 }
620
621 int
622 lafs_dir_roll_mini(struct inode *dir, int handle, int dirop,
623                    u32 inum, char *name, int len)
624 {
625         int err = 0;
626         struct dirop_handle doh, old_doh;
627         struct datablock *inodb = NULL, *olddb = NULL;
628         struct inode *inode = NULL;
629         struct rename_roll *rr = NULL, **rrp;
630         struct fs *fs = fs_from_inode(dir);
631         int last;
632
633         if (inum)
634                 inode = lafs_iget(LAFSI(dir)->filesys, inum, SYNC);
635         if (IS_ERR(inode))
636                 return PTR_ERR(inode);
637         if (!inode && dirop != DIROP_REN_TARGET)
638                 return -EINVAL;
639
640         switch (dirop) {
641         default:
642                 err = -EINVAL;
643                 break;
644         case DIROP_LINK:
645                 /* name doesn't exist - we create it. */
646                 err = dir_create_prepare(fs, dir, name, len,
647                                          inum, mode_to_dt(inode->i_mode), &doh);
648                 inodb = lafs_inode_dblock(dir, SYNC, MKREF(roll_dir));
649                 if (IS_ERR(inodb))
650                         err = PTR_ERR(inodb);
651
652                 err = err ?: dir_create_pin(&doh);
653                 err = err ?: lafs_pin_dblock(inodb, ReleaseSpace);
654                 if (err < 0) {
655                         dir_create_abort(&doh);
656                         break;
657                 }
658                 inode_inc_link_count(inode);
659                 lafs_inode_checkpin(inode);
660                 lafs_dirty_dblock(inodb);
661                 clear_bit(B_PinPending, &inodb->b.flags);
662                 dir_create_commit(&doh, fs, dir, name, len,
663                                   inum, mode_to_dt(inode->i_mode));
664                 err = 0;
665                 break;
666
667         case DIROP_UNLINK:
668                 /* Name exists, we need to remove it */
669                 last = (inode->i_nlink == 1);
670                 err = dir_delete_prepare(fs, dir, name, len, &doh);
671                 inodb = lafs_inode_dblock(inode, SYNC, MKREF(roll_dir));
672                 if (IS_ERR(inode))
673                         err = PTR_ERR(inodb);
674                 if (last && !err)
675                         err = lafs_make_orphan(fs, inodb);
676                 if (err) {
677                         dir_delete_abort(&doh);
678                         break;
679                 }
680                 lafs_iolock_block(&inodb->b);
681                 set_bit(B_PinPending, &inodb->b.flags);
682                 lafs_iounlock_block(&inodb->b);
683                 err = dir_delete_pin(&doh);
684                 err = err ?: lafs_pin_dblock(inodb, ReleaseSpace);
685                 if (err < 0) {
686                         dir_delete_abort(&doh);
687                         break;
688                 }
689                 inode_dec_link_count(inode);
690                 dir_delete_commit(&doh, fs, dir, name, len);
691                 lafs_inode_checkpin(inode);
692                 lafs_dirty_dblock(inodb);
693                 clear_bit(B_PinPending, &inodb->b.flags);
694                 err = 0;
695                 break;
696
697         case DIROP_REN_SOURCE:
698                 rr = kmalloc(sizeof(*rr) + len, GFP_KERNEL);
699                 if (!rr) {
700                         err = -ENOMEM;
701                         break;
702                 }
703                 rr->next = fs->pending_renames;
704                 rr->key = handle;
705                 rr->dir = dir; igrab(dir);
706                 rr->inode = inode; igrab(inode);
707                 rr->nlen = len;
708                 strncpy(rr->name, name, len);
709                 rr->name[len] = 0;
710                 fs->pending_renames = rr;
711                 rr = NULL;
712                 break;
713
714         case DIROP_REN_TARGET:
715                 rrp = &fs->pending_renames;
716                 while (*rrp) {
717                         rr = *rrp;
718                         if (rr->key == handle)
719                                 break;
720                         rrp = &rr->next;
721                 }
722                 if (!*rrp) {
723                         rr = NULL;
724                         err = -EINVAL;
725                         break;
726                 }
727                 *rrp = rr->next;
728                 rr->next = NULL;
729
730                 last = (inode && inode->i_nlink == 1);
731
732                 /* FIXME check both are dirs or non-dirs, and that a
733                  * target directory is empty */
734                 err = dir_delete_prepare(fs, rr->dir,
735                                          rr->name, rr->nlen,
736                                          &old_doh);
737                 olddb = lafs_inode_dblock(rr->inode, SYNC, MKREF(roll_dir));
738                 if (IS_ERR(olddb))
739                         err = PTR_ERR(olddb);
740                 if (inode) {
741                         /*unlink inode, update name */
742                         err = dir_update_prepare(fs, dir, name, len, &doh)
743                                 ?: err;
744                         inodb = lafs_inode_dblock(inode, SYNC, MKREF(roll_dir));
745                         if (IS_ERR(inodb))
746                                 err = PTR_ERR(inodb);
747                         if (last && !err)
748                                 err = lafs_make_orphan(fs, inodb);
749                         lafs_iolock_block(&inodb->b);
750                         set_bit(B_PinPending, &inodb->b.flags);
751                         lafs_iounlock_block(&inodb->b);
752                 } else
753                         /* create new link */
754                         err = dir_create_prepare(fs, dir, name, len,
755                                                  rr->inode->i_ino,
756                                                  mode_to_dt(rr->inode->i_mode),
757                                                  &doh) ?: err;
758
759                 if (!err) {
760                         lafs_iolock_block(&olddb->b);
761                         set_bit(B_PinPending, &olddb->b.flags);
762                         lafs_iounlock_block(&olddb->b);
763                 }
764
765                 err = err ?: dir_delete_pin(&old_doh);
766                 err = err ?: lafs_pin_dblock(olddb, ReleaseSpace);
767                 if (inode) {
768                         err = err ?: lafs_pin_dblock(inodb, ReleaseSpace);
769                         err = err ?: dir_update_pin(&doh);
770                 } else
771                         err = err ?: dir_create_pin(&doh);
772                 if (err < 0) {
773                         dir_delete_abort(&old_doh);
774                         if (inode)
775                                 dir_update_abort(&doh);
776                         else
777                                 dir_create_abort(&doh);
778                         break;
779                 }
780                 dir_delete_commit(&old_doh, fs, rr->dir, rr->name, rr->nlen);
781                 if (S_ISDIR(rr->inode->i_mode)) {
782                         inode_dec_link_count(rr->dir);
783                         if (!inode)
784                                 inode_inc_link_count(dir);
785                 }
786                 if (inode)
787                         dir_update_commit(fs, rr->inode->i_ino,
788                                           mode_to_dt(rr->inode->i_mode),
789                                           &doh);
790                 else
791                         dir_create_commit(&doh, fs, dir, name, len,
792                                           rr->inode->i_ino,
793                                           mode_to_dt(rr->inode->i_mode));
794                 LAFSI(rr->inode)->md.file.parent = dir->i_ino;
795                 if (inode) {
796                         if (S_ISDIR(inode->i_mode))
797                                 inode_dec_link_count(inode);
798                         inode_dec_link_count(inode);
799                         lafs_inode_checkpin(inode);
800                 }
801                 lafs_dirty_inode(rr->inode);
802                 lafs_inode_checkpin(rr->dir);
803                 lafs_inode_checkpin(dir);
804                 clear_bit(B_PinPending, &olddb->b.flags);
805                 if (inode) {
806                         clear_bit(B_PinPending, &inodb->b.flags);
807                         putdref(inodb, MKREF(dir_roll));
808                 }
809                 err = 0;
810                 break;
811         }
812         if (inode && !IS_ERR(inode))
813                 iput(inode);
814         if (inodb && !IS_ERR(inodb))
815                 putdref(inodb, MKREF(roll_dir));
816         if (olddb && !IS_ERR(olddb))
817                 putdref(olddb, MKREF(roll_dir));
818         if (rr) {
819                 iput(rr->dir);
820                 iput(rr->inode);
821                 kfree(rr);
822         }
823         return err;
824 }
825 /*------------------------------------------------------------
826  * Now we have the lowlevel operations in place, we
827  * can implement the VFS interface.
828  */
829 static int
830 lafs_create(struct inode *dir, struct dentry *de, int mode,
831      struct nameidata *nd)
832 {
833 /* Need to allocate an inode and space in the directory */
834         struct fs *fs = fs_from_inode(dir);
835         struct datablock *db;
836         struct inode *ino = lafs_new_inode(fs, LAFSI(dir)->filesys,
837                                            dir, TypeFile, 0, mode, &db);
838         struct dirop_handle doh;
839         struct update_handle uh;
840         int err;
841
842         if (IS_ERR(ino))
843                 return PTR_ERR(ino);
844
845         err = dir_create_prepare(fs, dir, de->d_name.name, de->d_name.len,
846                                  ino->i_ino, DT_REG, &doh);
847
848         dprintk("ERR = %d\n", err);
849         err = dir_log_prepare(&uh, fs, &de->d_name) ?: err;
850         dprintk("ERR2 = %d\n", err);
851         if (err)
852                 goto abort;
853
854 retry:
855         dprintk("lc: dirblk = %p\n", doh.dirent_block);
856         lafs_checkpoint_lock(fs);
857
858         err = dir_create_pin(&doh);
859         err = err ?: lafs_cluster_update_pin(&uh);
860         err = err ?: lafs_pin_dblock(db, NewSpace);
861         if (err == -EAGAIN) {
862                 lafs_checkpoint_unlock_wait(fs);
863                 goto retry;
864         }
865         if (err < 0)
866                 goto abort_unlock;
867
868         ino->i_nlink = 1;
869         lafs_add_orphan(fs, db);
870         LAFSI(ino)->md.file.parent = dir->i_ino;
871         lafs_dirty_inode(ino);
872         lafs_inode_checkpin(ino);
873         dir_log_commit(&uh, fs, dir, &de->d_name, ino->i_ino, DIROP_LINK, NULL);
874         dir_create_commit(&doh, fs, dir, de->d_name.name, de->d_name.len,
875                           ino->i_ino, DT_REG);
876         lafs_checkpoint_unlock(fs);
877         d_instantiate(de, ino);
878         clear_bit(B_PinPending, &db->b.flags);
879         putdref(db, MKREF(inode_new));
880         return 0;
881
882 abort_unlock:
883         lafs_checkpoint_unlock(fs);
884 abort:
885         lafs_cluster_update_abort(&uh);
886         dir_create_abort(&doh);
887         iput(ino);
888         clear_bit(B_PinPending, &db->b.flags);
889         putdref(db, MKREF(inode_new));
890         return err;
891 }
892
893 static int
894 lafs_link(struct dentry *from, struct inode *dir, struct dentry *to)
895 {
896         /* Create the new name and increase the link count on the target */
897         struct fs *fs = fs_from_inode(dir);
898         struct dirop_handle doh;
899         struct update_handle uh;
900         struct inode *inode = from->d_inode;
901         struct datablock *inodb;
902         int err;
903
904         if (inode->i_nlink >= LAFS_MAX_LINKS)
905                 return -EMLINK;
906         err = dir_create_prepare(fs, dir, to->d_name.name, to->d_name.len,
907                                  inode->i_ino, mode_to_dt(inode->i_mode),
908                                  &doh);
909         err = dir_log_prepare(&uh, fs, &to->d_name) ?: err;
910
911         inodb = lafs_inode_dblock(dir, SYNC, MKREF(link));
912         if (IS_ERR(inodb))
913                 err = PTR_ERR(inodb);
914         if (err)
915                 goto abort;
916 retry:
917         lafs_checkpoint_lock(fs);
918
919         err = dir_create_pin(&doh);
920         err = err ?: lafs_cluster_update_pin(&uh);
921         err = err ?: lafs_pin_dblock(inodb, NewSpace);
922         if (err == -EAGAIN) {
923                 lafs_checkpoint_unlock_wait(fs);
924                 goto retry;
925         }
926         if (err < 0)
927                 goto abort_unlock;
928
929         inode_inc_link_count(inode);
930         lafs_inode_checkpin(inode);
931         lafs_dirty_dblock(inodb);
932         clear_bit(B_PinPending, &inodb->b.flags);
933         putdref(inodb, MKREF(link));
934
935         dir_log_commit(&uh, fs, dir, &to->d_name, inode->i_ino,
936                        DIROP_LINK, NULL);
937         dir_create_commit(&doh, fs, dir, to->d_name.name, to->d_name.len,
938                           inode->i_ino, mode_to_dt(inode->i_mode));
939         /* Don't log the nlink change - that is implied in the name creation */
940         d_instantiate(to, inode);
941
942         lafs_checkpoint_unlock(fs);
943         return 0;
944 abort_unlock:
945         lafs_checkpoint_unlock(fs);
946         clear_bit(B_PinPending, &inodb->b.flags);
947 abort:
948         if (!IS_ERR(inodb))
949                 putdref(inodb, MKREF(link));
950         dir_create_abort(&doh);
951         lafs_cluster_update_abort(&uh);
952         return err;
953 }
954
955 static int
956 lafs_unlink(struct inode *dir, struct dentry *de)
957 {
958         struct fs *fs = fs_from_inode(dir);
959         struct inode *inode = de->d_inode;
960         int last = (inode->i_nlink == 1);
961         struct dirop_handle doh;
962         struct update_handle uh;
963         struct datablock *inodb;
964         int err;
965
966         dprintk("unlink %s\n", de->d_name.name);
967
968         err = dir_delete_prepare(fs, dir, de->d_name.name, de->d_name.len,
969                                  &doh);
970         BUG_ON(err == -ENOENT);
971         err = dir_log_prepare(&uh, fs, &de->d_name) ?: err;
972         inodb = lafs_inode_dblock(inode, SYNC, MKREF(inode_update));
973         if (IS_ERR(inodb))
974                 err = PTR_ERR(inodb);
975         if (last && !err)
976                 err = lafs_make_orphan(fs, inodb);
977         if (err)
978                 goto abort;
979         lafs_iolock_block(&inodb->b);
980         set_bit(B_PinPending, &inodb->b.flags);
981         lafs_iounlock_block(&inodb->b);
982 retry:
983         lafs_checkpoint_lock(fs);
984
985         err = dir_delete_pin(&doh);
986         err = err ?: lafs_cluster_update_pin(&uh);
987         err = err ?: lafs_pin_dblock(inodb, ReleaseSpace);
988         if (err == -EAGAIN) {
989                 lafs_checkpoint_unlock_wait(fs);
990                 goto retry;
991         }
992         if (err < 0)
993                 goto abort_unlock;
994
995         inode_dec_link_count(inode);
996         dir_log_commit(&uh, fs, dir, &de->d_name, inode->i_ino,
997                        DIROP_UNLINK, NULL);
998         dir_delete_commit(&doh, fs, dir, de->d_name.name, de->d_name.len);
999         lafs_checkpoint_unlock(fs);
1000         lafs_inode_checkpin(inode);
1001         lafs_dirty_dblock(inodb);
1002         clear_bit(B_PinPending, &inodb->b.flags);
1003         putdref(inodb, MKREF(inode_update));
1004         return 0;
1005 abort_unlock:
1006         clear_bit(B_PinPending, &inodb->b.flags);
1007         lafs_checkpoint_unlock(fs);
1008 abort:
1009         if (!IS_ERR(inodb))
1010                 putdref(inodb, MKREF(inode_update));
1011         lafs_cluster_update_abort(&uh);
1012         dir_delete_abort(&doh);
1013         return err;
1014 }
1015
1016 static void dir_flush_orphans(struct fs *fs, struct inode *inode)
1017 {
1018         /*
1019          * Orphans cannot clear while we hold i_mutex, so
1020          * we have to run them ourselves.
1021          */
1022         struct datablock *db;
1023         DEFINE_WAIT(wq);
1024         while ((db = lafs_find_orphan(inode))) {
1025                 int still_orphan;
1026                 prepare_to_wait(&fs->async_complete, &wq,
1027                                 TASK_UNINTERRUPTIBLE);
1028                 getdref(db, MKREF(rmdir_orphan));
1029                 lafs_dir_handle_orphan(db);
1030                 still_orphan = test_bit(B_Orphan, &db->b.flags);
1031                 putdref(db, MKREF(rmdir_orphan));
1032                 if (still_orphan)
1033                         /* still an orphan, need to wait */
1034                         schedule();
1035         }
1036         finish_wait(&fs->async_complete, &wq);
1037 }
1038
1039 static int
1040 lafs_rmdir(struct inode *dir, struct dentry *de)
1041 {
1042         struct fs *fs = fs_from_inode(dir);
1043         struct inode *inode = de->d_inode;
1044         struct dirop_handle doh;
1045         struct update_handle uh;
1046         struct datablock *inodb;
1047         int err;
1048
1049         if (inode->i_nlink > 2)
1050                 return -ENOTEMPTY;
1051         if (inode->i_size) {
1052                 /* Probably not empty, but it could be that we
1053                  * just need to wait for orphans the clear.
1054                  */
1055                 dir_flush_orphans(fs, inode);
1056                 if (inode->i_size)
1057                         return -ENOTEMPTY;
1058         }
1059
1060         dprintk("rmdir %s\n", de->d_name.name);
1061
1062         err = dir_delete_prepare(fs, dir, de->d_name.name, de->d_name.len,
1063                                  &doh);
1064         err = dir_log_prepare(&uh, fs, &de->d_name) ?: err;
1065         inodb = lafs_inode_dblock(inode, SYNC, MKREF(inode_update));
1066         if (IS_ERR(inodb))
1067                 err = PTR_ERR(inodb);
1068         if (!err)
1069                 err = lafs_make_orphan(fs, inodb);
1070         if (err)
1071                 goto abort;
1072         lafs_iolock_block(&inodb->b);
1073         set_bit(B_PinPending, &inodb->b.flags);
1074         lafs_iounlock_block(&inodb->b);
1075 retry:
1076         lafs_checkpoint_lock(fs);
1077
1078         err = dir_delete_pin(&doh);
1079         err = err ?: lafs_cluster_update_pin(&uh);
1080         err = err ?: lafs_pin_dblock(inodb, ReleaseSpace);
1081         if (err == -EAGAIN) {
1082                 lafs_checkpoint_unlock_wait(fs);
1083                 goto retry;
1084         }
1085         if (err < 0)
1086                 goto abort_unlock;
1087
1088         inode_dec_link_count(dir);
1089         inode_dec_link_count(inode); /* . */
1090         inode_dec_link_count(inode); /* .. */
1091         dir_log_commit(&uh, fs, dir, &de->d_name, inode->i_ino,
1092                        DIROP_UNLINK, NULL);
1093         dir_delete_commit(&doh, fs, dir, de->d_name.name, de->d_name.len);
1094         lafs_dirty_dblock(inodb);
1095         clear_bit(B_PinPending, &inodb->b.flags);
1096         putdref(inodb, MKREF(inode_update));
1097         lafs_inode_checkpin(inode);
1098         lafs_inode_checkpin(dir);
1099         lafs_checkpoint_unlock(fs);
1100         return 0;
1101 abort_unlock:
1102         lafs_checkpoint_unlock(fs);
1103         clear_bit(B_PinPending, &inodb->b.flags);
1104 abort:
1105         if (!IS_ERR(inodb))
1106                 putdref(inodb, MKREF(inode_update));
1107         lafs_cluster_update_abort(&uh);
1108         dir_delete_abort(&doh);
1109         return err;
1110 }
1111
1112 static int
1113 lafs_symlink(struct inode *dir, struct dentry *de,
1114              const char *symlink)
1115 {
1116         int l;
1117         struct inode *ino;
1118         struct fs *fs = fs_from_inode(dir);
1119         struct datablock *b, *inodb;
1120         struct dirop_handle doh;
1121         struct update_handle uh;
1122         char *buf;
1123         int err;
1124
1125         l = strlen(symlink);
1126         if (l > fs->blocksize-1)
1127                 return -ENAMETOOLONG;
1128
1129         ino = lafs_new_inode(fs, LAFSI(dir)->filesys, dir,
1130                              TypeSymlink, 0, 0666, &inodb);
1131         if (IS_ERR(ino))
1132                 return PTR_ERR(ino);
1133         b = lafs_get_block(ino, 0, NULL, GFP_KERNEL, MKREF(symlink));
1134         if (!b) {
1135                 putdref(inodb, MKREF(inode_new));
1136                 iput(ino);
1137                 return -ENOMEM;
1138         }
1139         set_bit(B_PinPending, &b->b.flags);
1140
1141         err = dir_create_prepare(fs, dir, de->d_name.name, de->d_name.len,
1142                                  ino->i_ino, DT_LNK, &doh);
1143         err = dir_log_prepare(&uh, fs, &de->d_name) ?: err;
1144         if (err)
1145                 goto abort;
1146 retry:
1147         lafs_checkpoint_lock(fs);
1148
1149         err = dir_create_pin(&doh);
1150         err = err ?: lafs_pin_dblock(b, NewSpace);
1151         err = err ?: lafs_cluster_update_pin(&uh);
1152         if (err == -EAGAIN) {
1153                 lafs_checkpoint_unlock_wait(fs);
1154                 goto retry;
1155         }
1156         if (err < 0)
1157                 goto abort_unlock;
1158
1159         ino->i_nlink = 1;
1160         LAFSI(ino)->md.file.parent = dir->i_ino;
1161         lafs_add_orphan(fs, inodb);
1162
1163         lafs_iolock_block(&b->b);
1164         buf = map_dblock(b);
1165         memcpy(buf, symlink, l);
1166         buf[l] = 0;
1167         unmap_dblock(b, buf);
1168         set_bit(B_Valid, &b->b.flags);
1169         lafs_dirty_dblock(b);
1170         clear_bit(B_PinPending, &b->b.flags);
1171         lafs_cluster_allocate(&b->b, 0); /* Content will go in the next cluster - almost like
1172                                           * an update */
1173         putdref(b, MKREF(symlink));
1174         i_size_write(ino, l);
1175         lafs_dirty_inode(ino);
1176         lafs_inode_checkpin(ino);
1177
1178         dir_log_commit(&uh, fs, dir, &de->d_name, ino->i_ino, DIROP_LINK, NULL);
1179         dir_create_commit(&doh, fs, dir, de->d_name.name, de->d_name.len,
1180                           ino->i_ino, DT_LNK);
1181         lafs_checkpoint_unlock(fs);
1182         d_instantiate(de, ino);
1183         putdref(inodb, MKREF(inode_new));
1184         return 0;
1185 abort_unlock:
1186         lafs_checkpoint_unlock(fs);
1187 abort:
1188         putdref(inodb, MKREF(inode_new));
1189         clear_bit(B_PinPending, &b->b.flags);
1190         putdref(b, MKREF(symlink));
1191         dir_create_abort(&doh);
1192         lafs_cluster_update_abort(&uh);
1193         iput(ino);
1194         return err;
1195 }
1196
1197 static int
1198 lafs_mkdir(struct inode *dir, struct dentry *de, int mode)
1199 {
1200         struct inode *ino;
1201         struct lafs_inode *lai;
1202         struct fs *fs = fs_from_inode(dir);
1203         int err;
1204         struct dirop_handle doh;
1205         struct update_handle uh;
1206         struct datablock *inodb;
1207
1208         if (dir->i_nlink >= LAFS_MAX_LINKS)
1209                 return -EMLINK;
1210
1211         ino = lafs_new_inode(fs, LAFSI(dir)->filesys, dir,
1212                              TypeDir, 0, mode, &inodb);
1213         if (IS_ERR(ino))
1214                 return PTR_ERR(ino);
1215
1216         err = dir_create_prepare(fs, dir, de->d_name.name, de->d_name.len,
1217                                  ino->i_ino, DT_DIR, &doh);
1218         err = dir_log_prepare(&uh, fs, &de->d_name) ?: err;
1219         if (err)
1220                 goto abort;
1221 retry:
1222         lafs_checkpoint_lock(fs);
1223
1224         err = dir_create_pin(&doh);
1225         err = err ?: lafs_pin_dblock(inodb, NewSpace);
1226         err = err ?: lafs_cluster_update_pin(&uh);
1227         if (err == -EAGAIN) {
1228                 lafs_checkpoint_unlock_wait(fs);
1229                 goto retry;
1230         }
1231         if (err < 0)
1232                 goto abort_unlock;
1233
1234         lai = LAFSI(ino);
1235         lai->md.file.parent = dir->i_ino;
1236         inode_inc_link_count(dir);
1237         ino->i_nlink = 2; /* From parent, and from '.' */
1238         lafs_dirty_inode(ino);
1239         lafs_inode_checkpin(dir);
1240         lafs_inode_checkpin(ino);
1241         lafs_add_orphan(fs, inodb);
1242         dir_create_commit(&doh, fs, dir, de->d_name.name, de->d_name.len,
1243                           ino->i_ino, DT_DIR);
1244         d_instantiate(de, ino);
1245         clear_bit(B_PinPending, &inodb->b.flags);
1246         putdref(inodb, MKREF(inode_new));
1247         lafs_checkpoint_unlock(fs);
1248         return 0;
1249 abort_unlock:
1250         lafs_checkpoint_unlock(fs);
1251 abort:
1252         dir_create_abort(&doh);
1253         lafs_cluster_update_abort(&uh);
1254         iput(ino);
1255         clear_bit(B_PinPending, &inodb->b.flags);
1256         putdref(inodb, MKREF(inode_new));
1257         return err;
1258 }
1259
1260 static int
1261 lafs_mknod(struct inode *dir, struct dentry *de, int mode,
1262            dev_t rdev)
1263 {
1264         struct inode *ino;
1265         struct fs *fs = fs_from_inode(dir);
1266         int err;
1267         struct dirop_handle doh;
1268         struct update_handle uh;
1269         struct datablock *inodb;
1270         int type;
1271
1272         if (!new_valid_dev(rdev))
1273                 return -EINVAL;
1274
1275         type = TypeSpecial;
1276         switch (mode & S_IFMT) {
1277         case S_IFREG:
1278                 type = TypeFile;
1279                 break;
1280         case S_IFCHR:
1281         case S_IFBLK:
1282         case S_IFIFO:
1283         case S_IFSOCK:
1284                 break;
1285         default:
1286                 return -EINVAL;
1287         }
1288         ino = lafs_new_inode(fs, LAFSI(dir)->filesys, dir,
1289                              type, 0, mode, &inodb);
1290         if (IS_ERR(ino))
1291                 return PTR_ERR(ino);
1292         init_special_inode(ino, ino->i_mode, rdev);
1293
1294         err = dir_create_prepare(fs, dir, de->d_name.name, de->d_name.len,
1295                                  ino->i_ino, type, &doh);
1296         err = dir_log_prepare(&uh, fs, &de->d_name) ?: err;
1297         if (err)
1298                 goto abort;
1299 retry:
1300         lafs_checkpoint_lock(fs);
1301
1302         err = dir_create_pin(&doh);
1303         err = err ?: lafs_pin_dblock(inodb, NewSpace);
1304         err = err ?: lafs_cluster_update_pin(&uh);
1305         if (err == -EAGAIN) {
1306                 lafs_checkpoint_unlock_wait(fs);
1307                 goto retry;
1308         }
1309         if (err < 0)
1310                 goto abort_unlock;
1311
1312         LAFSI(ino)->md.file.parent = dir->i_ino;
1313         ino->i_nlink = 1;
1314         lafs_add_orphan(fs, inodb);
1315         lafs_dirty_inode(ino);
1316         lafs_inode_checkpin(ino);
1317
1318         dir_log_commit(&uh, fs, dir, &de->d_name, ino->i_ino, DIROP_LINK, NULL);
1319         dir_create_commit(&doh, fs, dir, de->d_name.name, de->d_name.len,
1320                           ino->i_ino, type);
1321         lafs_checkpoint_unlock(fs);
1322         d_instantiate(de, ino);
1323         clear_bit(B_PinPending, &inodb->b.flags);
1324         putdref(inodb, MKREF(inode_new));
1325         return 0;
1326 abort_unlock:
1327         lafs_checkpoint_unlock(fs);
1328 abort:
1329         dir_create_abort(&doh);
1330         lafs_cluster_update_abort(&uh);
1331         iput(ino);
1332         clear_bit(B_PinPending, &inodb->b.flags);
1333         putdref(inodb, MKREF(inode_new));
1334         return err;
1335 }
1336
1337 static int
1338 lafs_rename(struct inode *old_dir, struct dentry *old_dentry,
1339             struct inode *new_dir, struct dentry *new_dentry)
1340 {
1341         /* VFS has checked that this is file->file or dir->dir or
1342          * something->nothing.
1343          * We just need to check that if the target is a directory,
1344          * it is empty, then perform the rename
1345          */
1346         struct fs *fs = fs_from_inode(old_dir);
1347
1348         struct inode *old_inode = old_dentry->d_inode;
1349         struct inode *new_inode = new_dentry->d_inode;
1350
1351         struct datablock *olddb, *newdb = newdb;
1352
1353         struct dirop_handle old_doh, new_doh;
1354         struct update_handle old_uh, new_uh;
1355         int last = (new_inode && new_inode->i_nlink == 1);
1356         u32 renhandle;
1357         int err;
1358
1359         if (S_ISDIR(old_inode->i_mode)) {
1360                 if (new_inode) {
1361                         if (new_inode->i_size) {
1362                                 dir_flush_orphans(fs, new_inode);
1363                                 if (new_inode->i_size)
1364                                         return -ENOTEMPTY;
1365                         }
1366                 } else if (new_dir != old_dir) {
1367                         /* New dir is getting a new link */
1368                         if (new_dir->i_nlink >= LAFS_MAX_LINKS)
1369                                 return -EMLINK;
1370                 }
1371         }
1372         dprintk("rename %s %s\n", old_dentry->d_name.name,
1373                 new_dentry->d_name.name);
1374
1375         /* old entry gets deleted, new entry gets created or updated. */
1376         err = dir_delete_prepare(fs, old_dir,
1377                                  old_dentry->d_name.name,
1378                                  old_dentry->d_name.len,
1379                                  &old_doh);
1380         err = dir_log_prepare(&old_uh, fs, &old_dentry->d_name) ?: err;
1381         err = dir_log_prepare(&new_uh, fs, &new_dentry->d_name) ?: err;
1382         olddb = lafs_inode_dblock(old_inode, SYNC, MKREF(inode_update));
1383         if (IS_ERR(olddb))
1384                 err = PTR_ERR(olddb);
1385
1386         if (new_inode) {
1387                 /* unlink object, update name */
1388                 err = dir_update_prepare(fs, new_dir,
1389                                          new_dentry->d_name.name,
1390                                          new_dentry->d_name.len,
1391                                          &new_doh) ?: err;
1392                 newdb = lafs_inode_dblock(new_inode, SYNC, MKREF(inode_update));
1393                 if (IS_ERR(newdb))
1394                         err = PTR_ERR(newdb);
1395                 if (last && !err)
1396                         err = lafs_make_orphan(fs, newdb);
1397                 lafs_iolock_block(&newdb->b);
1398                 set_bit(B_PinPending, &newdb->b.flags);
1399                 lafs_iounlock_block(&newdb->b);
1400         } else
1401                 /* create new link */
1402                 err = dir_create_prepare(fs, new_dir,
1403                                          new_dentry->d_name.name,
1404                                          new_dentry->d_name.len,
1405                                          old_inode->i_ino,
1406                                          mode_to_dt(old_inode->i_mode),
1407                                          &new_doh) ?: err;
1408
1409         if (err)
1410                 goto abort;
1411
1412         lafs_iolock_block(&olddb->b);
1413         set_bit(B_PinPending, &olddb->b.flags);
1414         lafs_iounlock_block(&olddb->b);
1415 retry:
1416         lafs_checkpoint_lock(fs);
1417
1418         err = dir_delete_pin(&old_doh);
1419         err = err ?: lafs_cluster_update_pin(&old_uh);
1420         err = err ?: lafs_cluster_update_pin(&new_uh);
1421         err = err ?: lafs_pin_dblock(olddb, ReleaseSpace);
1422         if (new_inode) {
1423                 err = err ?: lafs_pin_dblock(newdb, NewSpace);
1424                 err = err ?: dir_update_pin(&new_doh);
1425         } else
1426                 err = err ?: dir_create_pin(&new_doh);
1427
1428         if (err == -EAGAIN) {
1429                 lafs_checkpoint_unlock_wait(fs);
1430                 goto retry;
1431         }
1432         if (err < 0)
1433                 goto abort_unlock;
1434
1435         /* OK, let's do the deed */
1436         dir_delete_commit(&old_doh, fs, old_dir, old_dentry->d_name.name,
1437                           old_dentry->d_name.len);
1438         dir_log_commit(&old_uh, fs, old_dir, &old_dentry->d_name,
1439                        old_inode->i_ino, DIROP_REN_SOURCE, &renhandle);
1440         dir_log_commit(&new_uh, fs, new_dir, &new_dentry->d_name,
1441                        new_inode ? new_inode->i_ino : 0,
1442                        DIROP_REN_TARGET,
1443                        &renhandle);
1444         if (S_ISDIR(old_inode->i_mode)) {
1445                 inode_dec_link_count(old_dir);
1446                 if (!new_inode)
1447                         inode_inc_link_count(new_dir);
1448         }
1449         if (new_inode)
1450                 dir_update_commit(fs, old_inode->i_ino,
1451                                   mode_to_dt(old_inode->i_mode),
1452                                   &new_doh);
1453         else
1454                 dir_create_commit(&new_doh, fs, new_dir,
1455                                   new_dentry->d_name.name,
1456                                   new_dentry->d_name.len,
1457                                   old_inode->i_ino,
1458                                   mode_to_dt(old_inode->i_mode));
1459         LAFSI(old_inode)->md.file.parent = new_dir->i_ino;
1460         if (new_inode) {
1461                 if (S_ISDIR(new_inode->i_mode))
1462                         inode_dec_link_count(new_inode);
1463                 inode_dec_link_count(new_inode);
1464                 lafs_inode_checkpin(new_inode);
1465         }
1466         lafs_dirty_inode(old_inode);
1467         lafs_inode_checkpin(new_dir);
1468         lafs_inode_checkpin(old_dir);
1469         clear_bit(B_PinPending, &olddb->b.flags);
1470         putdref(olddb, MKREF(inode_update));
1471         if (new_inode) {
1472                 clear_bit(B_PinPending, &newdb->b.flags);
1473                 putdref(newdb, MKREF(inode_new));
1474         }
1475
1476         lafs_checkpoint_unlock(fs);
1477         return 0;
1478
1479 abort_unlock:
1480         lafs_checkpoint_unlock(fs);
1481         clear_bit(B_PinPending, &olddb->b.flags);
1482         if (new_inode)
1483                 clear_bit(B_PinPending, &newdb->b.flags);
1484 abort:
1485         dir_delete_abort(&old_doh);
1486         lafs_cluster_update_abort(&old_uh);
1487         lafs_cluster_update_abort(&new_uh);
1488         if (!IS_ERR(olddb))
1489                 putdref(olddb, MKREF(inode_update));
1490         if (new_inode) {
1491                 dir_update_abort(&new_doh);
1492                 if (!IS_ERR(newdb))
1493                         putdref(newdb, MKREF(inode_new));
1494         } else
1495                 dir_create_abort(&new_doh);
1496         return err;
1497 }
1498
1499 /*--------------------------------------------------------------------
1500  * Directory Orphan handling.
1501  *
1502  * blocks in a directory file that are 'orphans' have recently had a deletion
1503  * and may need:
1504  *   - to be punched as a hole if empty
1505  *   - to have 'deleted' entries purged in they are freeable
1506  *   - to schedule next block for orphan handling if that might be appropriate.
1507  *
1508  *
1509  * Specifically:
1510  *  Lock the directory.
1511  *   If last possible entry (addr-1) is deleted,
1512  *    look for next entry.
1513  *    If it doesn't exist, remove last entry an preceding deleted
1514  *     entries, just like with delete.
1515  *   If first block is 'deleted' and next is removed,
1516  *     remove that deleted entry.
1517  *     look for previous entry.
1518  *     if it is deleted, schedule orphan handling.
1519  *   If ->root is 0, punch a hole
1520  */
1521
1522 int lafs_dir_handle_orphan(struct datablock *b)
1523 {
1524         struct inode *dir = b->b.inode;
1525         struct fs *fs = fs_from_inode(dir);
1526         int bits = dir->i_blkbits-8;
1527         u32 seed = LAFSI(dir)->md.file.seed;
1528         u32 hash;
1529         char *buf, *buf2;
1530         struct datablock *b2 = NULL;
1531         u8 piece, firstpiece;
1532         struct dir_ent de;
1533         int err = 0;
1534
1535         dprintk("HANDLE ORPHAN h=%x %s\n", (unsigned)hash, strblk(&b->b));
1536
1537         if (!lafs_iolock_written_async(&b->b))
1538                 return -EAGAIN;
1539         set_bit(B_PinPending, &b->b.flags);
1540         lafs_iounlock_block(&b->b);
1541
1542         lafs_checkpoint_lock(fs);
1543
1544         if (!test_bit(B_Valid, &b->b.flags)) {
1545                 /* probably have already erased this block,
1546                  * but the orphan_release failed due to
1547                  * space being tight.
1548                  * just try again
1549                  */
1550                 lafs_orphan_release(fs, b);
1551                 err = 0;
1552                 goto abort;
1553         }
1554
1555         /* First test:  Does a chain of deleted entries extend beyond
1556          * the end of this block.  i.e. is the last entry deleted.
1557          * If so, look at the next block and see if the chain is still
1558          * anchored, or if it can all be released.
1559          */
1560         buf = map_dblock(b);
1561         hash = (b->b.fileaddr-1) & MaxDirHash;
1562         if (lafs_dir_find(buf, bits, seed, hash, &piece) &&
1563             lafs_dir_extract(buf, bits, &de, piece, NULL)->target == 0) {
1564                 loff_t bnum;
1565                 unmap_dblock(b, buf);
1566                 bnum = b->b.fileaddr + 1;
1567                 if (lafs_find_next(dir, &bnum) == 0)
1568                         /* FIXME what if it returns an error */
1569                         bnum = 0;
1570
1571                 b2 = lafs_get_block(dir, bnum, NULL, GFP_KERNEL,
1572                                     MKREF(dir_orphan));
1573                 err = -ENOMEM;
1574                 if (!b2)
1575                         goto abort;
1576                 err = lafs_read_block_async(b2);
1577                 if (err)
1578                         goto abort;
1579
1580                 buf2 = map_dblock(b2);
1581                 if (lafs_dir_find(buf2, bits, seed, hash+1, &piece) == 0) {
1582                         u8 firstpiece;
1583                         /* We can remove that last entry, and maybe others */
1584                         unmap_dblock(b2, buf2);
1585
1586                         err = lafs_pin_dblock(b, ReleaseSpace);
1587                         if (err)
1588                                 goto abort;
1589                         buf = map_dblock(b);
1590                         lafs_dir_find(buf, bits, seed, 0, &firstpiece);
1591                         do {
1592                                 if (piece == firstpiece)
1593                                         break;
1594                                 lafs_dir_del_ent(buf, bits, seed, hash);
1595                                 hash--;
1596                         } while (lafs_dir_find(buf, bits, seed, hash, &piece) &&
1597                                  lafs_dir_extract(buf, bits, &de, piece,
1598                                                   NULL)->target == 0);
1599                         unmap_dblock(b, buf);
1600                         lafs_dirty_dblock(b);
1601                 } else
1602                         unmap_dblock(b2, buf2);
1603                 buf = map_dblock(b);
1604                 putdref(b2, MKREF(dir_orphan));
1605                 b2 = NULL;
1606         }
1607
1608         /* Second test:  if we have an unanchored chain at the start
1609          * of the block, then schedule orphan handling for previous block,
1610          * and remove the unanchor.
1611          */
1612         lafs_dir_find(buf, bits, seed, 0, &firstpiece);
1613         hash = seed;
1614         if (firstpiece &&
1615             lafs_dir_extract(buf, bits, &de, firstpiece, &hash)->target == 0 &&
1616             lafs_dir_find(buf, bits, seed, hash+1, &piece) == 0) {
1617                 unmap_dblock(b, buf);
1618                 b2 = lafs_get_block(dir, hash, NULL, GFP_KERNEL,
1619                                     MKREF(dir_orphan));
1620                 err = -ENOMEM;
1621                 if (!b2)
1622                         goto abort;
1623                 err = lafs_read_block_async(b2);
1624                 if (err)
1625                         goto abort;
1626
1627                 buf2 = map_dblock(b2);
1628                 if (lafs_dir_find(buf2, bits, seed, (hash-1) & MaxDirHash,
1629                                   &piece) &&
1630                     lafs_dir_extract(buf2, bits, &de, piece, NULL)->target == 0)
1631                         err = lafs_make_orphan_nb(fs, b2);
1632                 unmap_dblock(b2, buf2);
1633                 putdref(b2, MKREF(dir_orphan));
1634                 b2 = NULL;
1635                 if (err)
1636                         goto abort;
1637                 err = lafs_pin_dblock(b, ReleaseSpace);
1638                 if (err)
1639                         goto abort;
1640                 buf = map_dblock(b);
1641                 lafs_dir_del_ent(buf, bits, seed, hash);
1642                 lafs_dirty_dblock(b);
1643         }
1644
1645         if (lafs_dir_empty(buf)) {
1646                 loff_t bnum;
1647                 unmap_dblock(b, buf);
1648
1649                 err = lafs_pin_dblock(b, ReleaseSpace);
1650                 if (err)
1651                         goto abort;
1652
1653                 bnum = 1;
1654                 err = lafs_find_next(dir, &bnum);
1655                 if (err < 0)
1656                         goto abort;
1657                 if (err == 0) {
1658                         if (b->b.fileaddr == 0)
1659                                 i_size_write(dir, 0);
1660                         else {
1661                                 b2 = lafs_get_block(dir, 0, NULL, GFP_KERNEL,
1662                                                     MKREF(dir_orphan));
1663                                 err = -ENOMEM;
1664                                 if (!b2)
1665                                         goto abort;
1666                                 err = lafs_read_block_async(b2);
1667                                 if (err)
1668                                         goto abort;
1669                                 buf2 = map_dblock(b2);
1670                                 i_size_write(dir,
1671                                              lafs_dir_blk_size(buf2, bits));
1672                                 unmap_dblock(b2, buf2);
1673                                 putdref(b2, MKREF(dir_orphan));
1674                                 b2 = NULL;
1675                         }
1676                         lafs_dirty_inode(dir);
1677                 }
1678                 lafs_erase_dblock(b);
1679         } else
1680                 unmap_dblock(b, buf);
1681
1682         lafs_orphan_release(fs, b);
1683         err = 0;
1684
1685 abort:
1686         clear_bit(B_PinPending, &b->b.flags);
1687         putdref(b2, MKREF(dir_orphan));
1688         lafs_checkpoint_unlock(fs);
1689         return err;
1690 }
1691
1692 /*--------------------------------------------------------------------
1693  * Finally the read-only operations
1694  */
1695 static int
1696 lafs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1697 {
1698         struct dentry *dentry = filp->f_dentry;
1699         struct lafs_inode *lai = LAFSI(dentry->d_inode);
1700         ino_t ino;
1701         loff_t i = filp->f_pos;
1702         loff_t bnum;
1703         u32 hash;
1704         int err = 0;
1705         int over;
1706         int bits = dentry->d_inode->i_blkbits - 8;
1707         u32 seed = lai->md.file.seed;
1708
1709         switch (i) {
1710         case 0:
1711                 ino = dentry->d_inode->i_ino;
1712                 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
1713                         break;
1714                 filp->f_pos++;
1715                 i++;
1716                 /* fallthrough */
1717         case 1:
1718                 ino = lai->md.file.parent;
1719                 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
1720                         break;
1721                 filp->f_pos++;
1722                 i++;
1723                 /* fallthrough */
1724         default:
1725                 hash = i - 2;
1726                 err = 0;
1727                 over = 0;
1728                 do {
1729                         struct datablock *b;
1730                         char *buf;
1731
1732                         bnum = hash+1;
1733                         switch (lafs_find_next(dentry->d_inode, &bnum)) {
1734                         case 1:
1735                                 break;
1736                         case 0:
1737                                 bnum = 0;
1738                                 break;
1739                         default:
1740                                 return -EIO;
1741                         }
1742                         b = lafs_get_block(dentry->d_inode, bnum, NULL,
1743                                            GFP_KERNEL, MKREF(readdir));
1744                         if (!b) {
1745                                 err = -ENOMEM;
1746                                 break;
1747                         }
1748                         err = lafs_read_block(b);
1749                         if (err)
1750                                 break;
1751                         /* buf = map_dblock(b); */
1752                         buf = kmap(b->page);
1753                         buf += dblock_offset(b);
1754                         while (1) {
1755                                 u8 piece;
1756                                 struct dir_ent de;
1757
1758                                 lafs_dir_find(buf, bits, seed, hash, &piece);
1759                                 if (!piece)
1760                                         break;
1761                                 hash = seed;
1762                                 lafs_dir_extract(buf, bits, &de,
1763                                                  piece, &hash);
1764
1765                                 if (de.target == 0) {
1766                                         hash++;
1767                                         filp->f_pos = hash+2;
1768                                         continue;
1769                                 }
1770                                 /* This is a good name to return */
1771                                 over = filldir(dirent, de.name, de.nlen,
1772                                                hash+2, de.target, de.type);
1773                                 hash++;
1774                                 if (!over)
1775                                         filp->f_pos = hash+2;
1776                                 else
1777                                         break;
1778                         }
1779                         /* unmap_dblock(b, buf); */
1780                         kunmap(b->page);
1781                         putdref(b, MKREF(readdir));
1782                         hash = bnum;
1783                 } while (bnum && !over);
1784                 break;
1785         }
1786         return err;
1787 }
1788
1789 static struct dentry *
1790 lafs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1791 {
1792         /* Simple lookup that maps inode number to that inode. */
1793         u32 inum = 0;
1794         struct inode *ino;
1795         int err = dir_lookup(dir, dentry->d_name.name, dentry->d_name.len,
1796                              &inum);
1797
1798         if (err == -ENOENT) {
1799                 d_add(dentry, NULL);
1800                 return NULL;
1801         }
1802         /* FIXME range check inum */
1803
1804         if (err < 0)
1805                 ERR_PTR(err);
1806         ino = lafs_iget(LAFSI(dir)->filesys, inum, SYNC);
1807
1808         if (IS_ERR(ino))
1809                 return ERR_PTR(PTR_ERR(ino));
1810         return d_splice_alias(ino, dentry);
1811 }
1812
1813 static int lafs_getattr_dir(struct vfsmount *mnt, struct dentry *dentry,
1814                             struct kstat *stat)
1815 {
1816         lafs_fillattr(dentry->d_inode, stat);
1817         /* hide 'holes' in directories by making the size match
1818          * the number of allocated blocks.
1819          */
1820         if (stat->size > dentry->d_inode->i_sb->s_blocksize)
1821                 stat->size = (dentry->d_inode->i_sb->s_blocksize *
1822                              (LAFSI(dentry->d_inode)->cblocks +
1823                               LAFSI(dentry->d_inode)->pblocks +
1824                               LAFSI(dentry->d_inode)->ablocks));
1825         return 0;
1826 }
1827
1828 const struct file_operations lafs_dir_file_operations = {
1829         .llseek         = generic_file_llseek,  /* Just set 'pos' */
1830         .read           = generic_read_dir,     /* return error */
1831         .readdir        = lafs_readdir,
1832 #if 0
1833         .fsync          = lafs_sync_cluster,
1834 #endif
1835 };
1836
1837 const struct inode_operations lafs_dir_ino_operations = {
1838         .lookup         = lafs_lookup,
1839         .create         = lafs_create,
1840         .link           = lafs_link,
1841         .unlink         = lafs_unlink,
1842         .symlink        = lafs_symlink,
1843         .mkdir          = lafs_mkdir,
1844         .rmdir          = lafs_rmdir,
1845         .rename         = lafs_rename,
1846         .mknod          = lafs_mknod,
1847         .setattr        = lafs_setattr,
1848         .getattr        = lafs_getattr_dir,
1849 };