]> git.neil.brown.name Git - LaFS.git/blob - dir.c
README update
[LaFS.git] / dir.c
1
2 /*
3  * fs/lafs/dir.c
4  * Copyright (C) 2005-2009
5  * Neil Brown <neilb@suse.de>
6  * Released under the GPL, version 2
7  *
8  * Directory operations
9  */
10
11 #include        "lafs.h"
12 #include        <linux/slab.h>
13
14 #define S_SHIFT 12
15
16 static unsigned char file_type[1 + (S_IFMT >> S_SHIFT)] = {
17         [S_IFREG >> S_SHIFT]    = DT_REG,
18         [S_IFDIR >> S_SHIFT]    = DT_DIR,
19         [S_IFCHR >> S_SHIFT]    = DT_CHR,
20         [S_IFBLK >> S_SHIFT]    = DT_BLK,
21         [S_IFIFO >> S_SHIFT]    = DT_FIFO,
22         [S_IFSOCK >> S_SHIFT]   = DT_SOCK,
23         [S_IFLNK >> S_SHIFT]    = DT_LNK,
24 };
25
26 static inline int mode_to_dt(int mode)
27 {
28         return file_type[(mode & S_IFMT) >> S_SHIFT];
29 }
30
31 /*****************************************************************************
32  * Directory lookup
33  * Most of the work is done in dir_lookup_blk which returns the block
34  * that should hold the entry.
35  * dir_lookup takes that result and finds an inode number if possible.
36  */
37 static struct datablock *
38 dir_lookup_blk(struct inode *dir, const char *name, int nlen,
39                u8 *pp, int forcreate, u32 *hashp, REFARG)
40 {
41         struct lafs_inode *lai = LAFSI(dir);
42         u32 seed = lai->md.file.seed;
43         struct datablock *b, *cb = NULL;
44         loff_t bn;
45         u32 hash;
46         int err;
47         int bits = dir->i_blkbits-8;
48         if (nlen == 0)
49                 nlen = strlen(name);
50
51         hash = lafs_hash_name(seed, nlen, name);
52         dprintk("name= %s hash=%lu\n", name, (unsigned long)bn);
53
54         while (1) {
55                 char *buf;
56                 bn = hash+1;
57                 dprintk("bn=%lu\n", (unsigned long)bn);
58                 if (lafs_find_next(dir, &bn) == 0)
59                         bn = 0;
60                 dprintk("now=%lu\n", (unsigned long)bn);
61
62                 err = -ENOMEM;
63                 b = lafs_get_block(dir, bn, NULL, GFP_KERNEL, REF);
64                 if (!b)
65                         break;
66                 err = lafs_read_block(b);
67                 if (err)
68                         break;
69                 buf = map_dblock(b);
70                 if (lafs_trace) {
71                         void lafs_dir_print(char *buf, int psz);
72                         lafs_dir_print(buf, bits);
73                 }
74                 for ( ; hash < (bn ? bn : MaxDirHash) ; hash++) {
75                         u8 piece;
76                         struct dir_ent de;
77                         int rv = lafs_dir_find(buf, bits, seed, hash, &piece);
78
79                         if (rv == 0) {
80                                 unmap_dblock(b, buf);
81                                 if (forcreate) {
82                                         if (cb) {
83                                                 putdref(b, REF);
84                                                 b = cb;
85                                         } else
86                                                 *hashp = hash;
87                                         return b;
88                                 }
89                                 putdref(b, REF);
90                                 return ERR_PTR(-ENOENT);
91                         }
92                         lafs_dir_extract(buf, bits, &de, piece, NULL);
93                         if (!de.target) {
94                                 if (forcreate && !cb) {
95                                         cb = getdref(b, REF);
96                                         *hashp = hash;
97                                 }
98                                 continue;
99                         }
100                         if (de.nlen != nlen)
101                                 continue;
102                         if (strncmp(de.name, name, de.nlen) != 0)
103                                 continue;
104                         unmap_dblock(b, buf);
105                         if (forcreate) {
106                                 putdref(b, REF);
107                                 if (cb)
108                                         putdref(cb, REF);
109                                 return ERR_PTR(-EEXIST);
110                         }
111                         *pp = piece;
112                         if (hashp)
113                                 *hashp = hash;
114                         return b;
115                 }
116                 unmap_dblock(b, buf);
117                 putdref(b, REF);
118         }
119
120         putdref(b, REF);
121         putdref(cb, REF);
122         return ERR_PTR(err);
123 }
124
125 static int
126 dir_lookup(struct inode *dir, const char *name, int nlen,
127            u32 *inum)
128 {
129         /*
130          * return
131          *    -ve on error
132          *     0 on not found
133          *  type on found
134          */
135         struct datablock *b;
136         char *buf;
137         struct dir_ent de;
138         int bits = dir->i_blkbits-8;
139         u8 piece;
140
141         b = dir_lookup_blk(dir, name, nlen, &piece, 0, NULL, MKREF(dir_lookup));
142         if (IS_ERR(b))
143                 return PTR_ERR(b);
144         buf = map_dblock(b);
145         *inum = lafs_dir_extract(buf, bits, &de, piece, NULL)->target;
146         unmap_dblock(b, buf);
147         putdref(b, MKREF(dir_lookup));
148         return 1;
149 }
150
151 /*****************************************************************************
152  *
153  * Directory modification routines.
154  * We have sets of prepare, pin, commit, abort.
155  * 'prepare' happens outside of a phase-lock and can allocate
156  * and reserve blocks, and is free to fail.  Everything it does
157  * should be revertable - or atomically committed.
158  * 'pin' happens inside a phase-lock and should pin any blocks
159  * that might need to be dirtied.  It only fails if the pin fails.
160  * 'commit' is called inside the phase-lock if prepare and pin succeed.
161  * is must clean up anything that was set up by 'prepare'.
162  * 'abort' is called if anything fails.  It too should clean up anything
163  * that prepare sets up.
164  * If 'prepare' is called, then either 'commit' or 'abort' will be called
165  * so failure in prepare can leave cleanup to 'abort'.
166  *
167  * The whole processes uses a dirop_handle to store various aspects
168  * of state that might need to be unwound or committed.
169  * A compound operation such as rename may included several
170  * simple operations such as delete + create.  In that case there
171  * will be a separate dirop_handle for each simple operation.
172  * There will only be one create
173  */
174 struct dirop_handle {
175         struct datablock *dirent_block;
176         struct datablock *new;
177         char    *temp;
178         u32     hash;
179         u8      index;
180         int     chainoffset;
181 };
182
183 /*............................................................................
184  * Creating an entry in a directory.
185  * This is split into pre_create and commit_create
186  * We already know that the name doesn't exist so a lookup will fail,
187  * but will find the right place in the tree.
188  * pre_create allocates blocks as needed and stores info in the dirop_handle.
189  * commit_create finalises the create and cannot fail.
190  */
191
192 static int dir_create_prepare(struct fs *fs, struct inode *dir,
193                               const char *name, int nlen,
194                               u32 inum, int type,
195                               struct dirop_handle *doh)
196 {
197         /*
198          * We need one or 2 blocks
199          * - a dirent block with space to receive the new entry.
200          * - A free block into which the block index may split
201          * These will be pinned and allocated credits before we commit.
202          *
203          * For a single-block which still has room,
204          * we only need the first of those.
205          *
206          */
207         int blocksize = fs->blocksize;
208         struct datablock *dirblk;
209         char *buf, *n1, *n2;
210         int bits = dir->i_blkbits-8;
211         u8 piece;
212         struct lafs_inode *lai = LAFSI(dir);
213         u32 seed = lai->md.file.seed;
214         struct dirheader *dh;
215         int chainoffset;
216         u32 hash, newhash;
217         int rv;
218
219         doh->temp = NULL;
220         doh->new = NULL;
221
222         doh->dirent_block =
223                 dirblk = dir_lookup_blk(dir, name, nlen, &piece, 1, &hash,
224                                         MKREF(dir_blk));
225
226         if (IS_ERR(dirblk))
227                 return PTR_ERR(dirblk);
228
229         lafs_iolock_written(&dirblk->b);
230         set_bit(B_PinPending, &dirblk->b.flags);
231         /* i_mutex protect us now, so don't need to maintain the lock */
232         lafs_iounlock_block(&dirblk->b);
233
234         chainoffset = hash - lafs_hash_name(seed, nlen, name);
235         buf = map_dblock(dirblk);
236         rv = lafs_dir_add_ent(buf, bits, name, nlen, 0, DT_TEST,
237                               seed, hash, chainoffset);
238         unmap_dblock(dirblk, buf);
239         if (rv < 0)
240                 return -EEXIST;
241         if (rv == 1) {
242                 doh->hash = hash;
243                 doh->chainoffset = chainoffset;
244                 return 0;
245         }
246         /* Doesn't fit, try repacking */
247
248         buf = map_dblock(dirblk);
249         dh = (struct dirheader *)buf;
250         if (dh->freepieces >= space_needed(nlen, chainoffset, bits)) {
251                 char *tmp;
252                 unmap_dblock(dirblk, buf);
253                 tmp = kmalloc(256<<bits, GFP_KERNEL);
254                 if (tmp) {
255                         buf = map_dblock(dirblk);
256                         lafs_dir_repack(buf, bits, tmp, seed, 0);
257                         if (lafs_dir_add_ent(tmp, bits, name, nlen, 0, DT_TEST,
258                                              seed, hash, chainoffset)) {
259                                 memcpy(buf, tmp, blocksize);
260                                 unmap_dblock(dirblk, buf);
261                                 kfree(tmp);
262                                 doh->hash = hash;
263                                 doh->chainoffset = chainoffset;
264                                 return 0;
265                         }
266                         kfree(tmp);
267                 } else {
268                         unmap_dblock(dirblk, buf);
269                         return -ENOMEM;
270                 }
271         }
272         unmap_dblock(dirblk, buf);
273
274         /* Really doesn't fit, need to split.
275          * We have to perform the split now so that we can choose a new
276          * index and pin that block.
277          */
278         n1 = kmalloc(blocksize, GFP_KERNEL);
279         n2 = kmalloc(blocksize, GFP_KERNEL);
280         if (!n1 || !n2) {
281                 kfree(n1); kfree(n2);
282                 return -ENOMEM;
283         }
284         doh->new = lafs_get_block(dir, newhash+1, NULL, GFP_KERNEL,
285                                   MKREF(dir_new));
286         if (doh->new == NULL) {
287                 kfree(n1); kfree(n2);
288                 return -ENOMEM;
289         }
290         buf = map_dblock(dirblk);
291         lafs_dir_split(buf, bits, n1, n2, name, inum, type,
292                        &newhash, seed, hash, chainoffset);
293         unmap_dblock(dirblk, buf);
294         buf = map_dblock(doh->new);
295         memcpy(buf, n1, blocksize);
296         unmap_dblock(doh->new, buf);
297         set_bit(B_Valid, &doh->new->b.flags);
298         set_bit(B_PinPending, &doh->new->b.flags);
299         kfree(n1);
300         doh->temp = n2;
301         return 0;
302 }
303
304 static void
305 dir_create_commit(struct dirop_handle *doh,
306                   struct fs *fs, struct inode *dir,
307                   const char *name, int nlen, u32 target, int type)
308 {
309         /* We are committed to creating this entry.
310          * Everything has been allocated and pinned.
311          * All we do is:
312          * - add name to doh->dirent_block.
313          * - if doh->index == NULL, done.
314          * - possibly update indexslot to have address of doh->dirent_block.
315          * - if doh->new, split doh->index into doh->new adding doh->previndex
316          * - else add doh->previndex into doh->index
317          */
318         char *buf;
319         int bits = fs->blocksize_bits - 8;
320         int blocksize = fs->blocksize;
321         struct lafs_inode *lai = LAFSI(dir);
322         u32 seed = lai->md.file.seed;
323
324         buf = map_dblock(doh->dirent_block);
325
326         if (doh->new) {
327                 /* We did a split, and have the block ready to go */
328                 memcpy(buf, doh->temp, blocksize);
329                 kfree(doh->temp);
330                 doh->temp = NULL;
331                 lafs_dirty_dblock(doh->new);
332                 if ((((loff_t)doh->new->b.fileaddr+1) << dir->i_blkbits)
333                     > dir->i_size) {
334                         i_size_write(dir, (((loff_t)doh->new->b.fileaddr+1)
335                                            << dir->i_blkbits));
336                         lafs_dirty_inode(dir);
337                 }
338                 clear_bit(B_PinPending, &doh->new->b.flags);
339                 putdref(doh->new, MKREF(dir_new));
340         } else
341                 lafs_dir_add_ent(buf, bits, name, nlen, target,
342                                  type, seed, doh->hash, doh->chainoffset);
343         lafs_dirty_dblock(doh->dirent_block);
344         if (dir->i_size <= blocksize) {
345                 /* Make dir fit in inode if possible */
346                 i_size_write(dir, lafs_dir_blk_size(buf, bits));
347                 lafs_dirty_inode(dir);
348         }
349         clear_bit(B_PinPending, &doh->dirent_block->b.flags);
350         unmap_dblock(doh->dirent_block, buf);
351         putdref(doh->dirent_block, MKREF(dir_blk));
352 }
353
354 static int
355 dir_create_pin(struct dirop_handle *doh)
356 {
357         int err;
358         err = lafs_pin_dblock(doh->dirent_block, NewSpace);
359         if (err || doh->new == NULL)
360                 return err;
361         err = lafs_pin_dblock(doh->new, NewSpace);
362         return err;
363 }
364
365 static void
366 dir_create_abort(struct dirop_handle *doh)
367 {
368         kfree(doh->temp);
369         if (!IS_ERR(doh->dirent_block) && doh->dirent_block) {
370                 clear_bit(B_PinPending, &doh->dirent_block->b.flags);
371                 putdref(doh->dirent_block, MKREF(dir_blk));
372         }
373         if (!IS_ERR(doh->new) && doh->new) {
374                 clear_bit(B_PinPending, &doh->new->b.flags);
375                 putdref(doh->new, MKREF(dir_new));
376         }
377 }
378
379 /*---------------------------------------------------------------
380  * Delete directory entry.
381  * Deleting involves invalidating the entry in the dirent block,
382  * and then removing entry deleted entries that are not in a chain.
383  * If we cannot be sure, we schedule orphan processing to do
384  * the fine details of chain clearing.
385  */
386 static int
387 dir_delete_prepare(struct fs *fs, struct inode *dir,
388                const char *name, int nlen, struct dirop_handle *doh)
389 {
390         struct datablock *dirblk;
391         int orphan = 0;
392
393         doh->dirent_block =
394                 dirblk = dir_lookup_blk(dir, name, nlen, &doh->index,
395                                         0, &doh->hash, MKREF(dir_blk));
396         if (IS_ERR(dirblk) && PTR_ERR(dirblk) == -ENOENT) {
397                 lafs_trace = 1;
398                 dirblk = dir_lookup_blk(dir, name, nlen, &doh->index,
399                                         0, &doh->hash, MKREF(dir_blk));
400                 if (!IS_ERR(dirblk))
401                         printk("Weird: %s\n", strblk(&dirblk->b));
402                 lafs_trace = 0;
403         }
404
405         if (IS_ERR(dirblk))
406                 return PTR_ERR(dirblk);
407         lafs_iolock_written(&dirblk->b);
408         set_bit(B_PinPending, &dirblk->b.flags);
409         /* i_mutex protect us now, so don't need to maintain the lock */
410         lafs_iounlock_block(&dirblk->b);
411
412         /* Only make this block an orphan if there is a real
413          * possibilitiy.
414          * i.e. one of
415          *    We found the last possible entry
416          *    We found the first entry
417          *    We found the only entry (in which case we found the first)
418          *    First entry is deleted
419          */
420         if (((doh->hash+1) & MaxDirHash) == doh->dirent_block->b.fileaddr)
421                 /* Last possible entry is being remove */
422                 orphan=1;
423         if (!orphan) {
424                 u32 seed = LAFSI(dir)->md.file.seed;
425                 u8 firstpiece = 0;
426                 struct dir_ent de;
427                 char bits = dir->i_blkbits - 8;
428                 char *buf = map_dblock(dirblk);
429                 lafs_dir_find(buf, bits, seed, 0, &firstpiece);
430                 if (doh->index == firstpiece ||
431                     lafs_dir_extract(buf, bits, &de,
432                                      firstpiece, NULL)->target == 0)
433                         orphan = 1;
434                 unmap_dblock(dirblk, buf);
435         }
436         if (orphan)
437                 return lafs_make_orphan(fs, doh->dirent_block);
438         return 0;
439 }
440
441 static void
442 dir_delete_commit(struct dirop_handle *doh,
443                   struct fs *fs, struct inode *dir,
444                   const char *name, int nlen)
445 {
446         char *buf = map_dblock(doh->dirent_block);
447         char bits = dir->i_blkbits - 8;
448         struct dir_ent de;
449         u32 seed = LAFSI(dir)->md.file.seed;
450         u8 ignore;
451
452         /* First mark the entry as deleted, then consider removing it*/
453         de.target = 0;
454         de.type = 0;
455         lafs_dir_set_target(buf, bits, &de, doh->index);
456
457         /* If 'hash+1' is not in this block, make me an orphan
458          *   (as we cannot check the chain)
459          * If it is and exists, do nothing (could be in active chain).
460          * If it doesn't exist:
461          *    Remove this entry and any earlier deleted entries in a chain,
462          *    but don't remove the first entry in the block.
463          *    If we end up leaving that first entry, make me an orphan so
464          *    the we can check if the chain continues in a previous block.
465          */
466         if (((doh->hash+1) & MaxDirHash) == doh->dirent_block->b.fileaddr)
467                 unmap_dblock(doh->dirent_block, buf);
468         else if (lafs_dir_find(buf, bits, seed, doh->hash+1, &ignore) == 0) {
469                 /* This is the end of a chain, clean up */
470                 u8 firstpiece;
471                 u8 piece;
472                 u32 hash;
473
474                 lafs_dir_find(buf, bits, seed, 0, &firstpiece);
475                 hash = doh->hash; piece = doh->index;
476                 do {
477                         if (piece == firstpiece)
478                                 break;
479                         lafs_dir_del_ent(buf, bits, seed, hash);
480                         BUG_ON(hash == 0 || doh->hash - hash > 256);
481                         hash--;
482                 } while (lafs_dir_find(buf, bits, seed, hash, &piece) &&
483                          lafs_dir_extract(buf, bits, &de, piece,
484                                           NULL)->target == 0);
485
486                 unmap_dblock(doh->dirent_block, buf);
487         } else
488                 unmap_dblock(doh->dirent_block, buf);
489
490         lafs_dirty_dblock(doh->dirent_block);
491         clear_bit(B_PinPending, &doh->dirent_block->b.flags);
492         putdref(doh->dirent_block, MKREF(dir_blk));
493 }
494
495 static int
496 dir_delete_pin(struct dirop_handle *doh)
497 {
498         int err;
499         err = lafs_pin_dblock(doh->dirent_block, ReleaseSpace);
500         if (err)
501                 return err;
502         return 0;
503 }
504
505 static void
506 dir_delete_abort(struct dirop_handle *doh)
507 {
508         if (doh->dirent_block &&
509             !IS_ERR(doh->dirent_block)) {
510                 clear_bit(B_PinPending, &doh->dirent_block->b.flags);
511                 putdref(doh->dirent_block, MKREF(dir_blk));
512         }
513 }
514
515 /*--------------------------------------------------------------
516  * Update directory entry
517  * This is used for rename when the target already exists
518  * Rather than delete+create it becomes delete+update
519  * This is even similar to delete except that we don't bother
520  * with orphans.
521  */
522 static int
523 dir_update_prepare(struct fs *fs, struct inode *dir,
524                    const char *name, int nlen, struct dirop_handle *doh)
525 {
526         struct datablock *dirblk;
527
528         doh->dirent_block =
529                 dirblk = dir_lookup_blk(dir, name, nlen, &doh->index,
530                                         0, NULL, MKREF(dir_blk));
531         if (IS_ERR(dirblk))
532                 return PTR_ERR(dirblk);
533         lafs_iolock_written(&dirblk->b);
534         set_bit(B_PinPending, &dirblk->b.flags);
535         /* i_mutex protect us now, so don't need to maintain the lock */
536         lafs_iounlock_block(&dirblk->b);
537         return 0;
538 }
539
540 static void
541 dir_update_commit(struct fs *fs, u32 target, int type,
542                   struct dirop_handle *doh)
543 {
544         char *buf = map_dblock(doh->dirent_block);
545         int bits = doh->dirent_block->b.inode->i_blkbits - 8;
546         struct dir_ent de;
547
548         de.target = target;
549         de.type = type;
550         lafs_dir_set_target(buf, bits, &de, doh->index);
551         unmap_dblock(doh->dirent_block, buf);
552         lafs_dirty_dblock(doh->dirent_block);
553         clear_bit(B_PinPending, &doh->dirent_block->b.flags);
554         putdref(doh->dirent_block, MKREF(dir_blk));
555 }
556
557 static int
558 dir_update_pin(struct dirop_handle *doh)
559 {
560         return lafs_pin_dblock(doh->dirent_block, ReleaseSpace);
561 }
562
563 static void
564 dir_update_abort(struct dirop_handle *doh)
565 {
566         if (doh->dirent_block &&
567             !IS_ERR(doh->dirent_block)) {
568                 clear_bit(B_PinPending, &doh->dirent_block->b.flags);
569                 putdref(doh->dirent_block, MKREF(dir_blk));
570         }
571 }
572
573 /*------------------------------------------------------------
574  * Directory operations needs to be logged as transactions.
575  * The transaction is formed from the name preceded by 4 bytes
576  * for the inode number.
577  * A look to allow matching of related update is stored in the block number.
578  * The 'type' of transaction is recorded in the offset
579  */
580
581 static int dir_log_prepare(struct update_handle *uh,
582                            struct fs *fs,
583                            struct qstr *name)
584 {
585         return lafs_cluster_update_prepare(uh, fs, name->len+4);
586 }
587
588 static void dir_log_commit(struct update_handle *uh,
589                            struct fs *fs, struct inode *dir,
590                            struct qstr *name, u32 target,
591                            int operation, u32 *handle)
592 {
593         char mb[4];
594         static u32 hancnt;
595         u32 han = 0;
596
597         switch (operation) {
598         case DIROP_LINK:
599         case DIROP_UNLINK:
600                 han = 0;
601                 break;
602         case DIROP_REN_SOURCE:
603                 while (++hancnt == 0)
604                         ;
605                 han = hancnt;
606                 *handle = han;
607                 break;
608         case DIROP_REN_TARGET:
609                 han = *handle;
610                 break;
611         default:
612                 BUG();
613         }
614
615         *(u32 *)mb = cpu_to_le32(target);
616         lafs_cluster_update_commit_buf(uh, fs, dir, han, operation,
617                                        4+name->len, mb,
618                                        name->len, name->name);
619 }
620
621 int
622 lafs_dir_roll_mini(struct inode *dir, int handle, int dirop,
623                    u32 inum, char *name, int len)
624 {
625         int err = 0;
626         struct dirop_handle doh, old_doh;
627         struct datablock *inodb = NULL, *olddb = NULL;
628         struct inode *inode = NULL;
629         struct rename_roll *rr = NULL, **rrp;
630         struct fs *fs = fs_from_inode(dir);
631         int last;
632
633         if (inum)
634                 inode = lafs_iget(LAFSI(dir)->filesys, inum, SYNC);
635         if (IS_ERR(inode))
636                 return PTR_ERR(inode);
637         if (!inode && dirop != DIROP_REN_TARGET)
638                 return -EINVAL;
639
640         switch (dirop) {
641         default:
642                 err = -EINVAL;
643                 break;
644         case DIROP_LINK:
645                 /* name doesn't exist - we create it. */
646                 err = dir_create_prepare(fs, dir, name, len,
647                                          inum, mode_to_dt(inode->i_mode), &doh);
648                 inodb = lafs_inode_dblock(dir, SYNC, MKREF(roll_dir));
649                 if (IS_ERR(inodb))
650                         err = PTR_ERR(inodb);
651
652                 err = err ?: dir_create_pin(&doh);
653                 err = err ?: lafs_pin_dblock(inodb, ReleaseSpace);
654                 if (err < 0) {
655                         dir_create_abort(&doh);
656                         break;
657                 }
658                 inode_inc_link_count(inode);
659                 lafs_inode_checkpin(inode);
660                 lafs_dirty_dblock(inodb);
661                 clear_bit(B_PinPending, &inodb->b.flags);
662                 dir_create_commit(&doh, fs, dir, name, len,
663                                   inum, mode_to_dt(inode->i_mode));
664                 err = 0;
665                 break;
666
667         case DIROP_UNLINK:
668                 /* Name exists, we need to remove it */
669                 last = (inode->i_nlink == 1);
670                 err = dir_delete_prepare(fs, dir, name, len, &doh);
671                 inodb = lafs_inode_dblock(inode, SYNC, MKREF(roll_dir));
672                 if (IS_ERR(inode))
673                         err = PTR_ERR(inodb);
674                 if (last && !err)
675                         err = lafs_make_orphan(fs, inodb);
676                 if (err) {
677                         dir_delete_abort(&doh);
678                         break;
679                 }
680                 lafs_iolock_block(&inodb->b);
681                 set_bit(B_PinPending, &inodb->b.flags);
682                 lafs_iounlock_block(&inodb->b);
683                 err = dir_delete_pin(&doh);
684                 err = err ?: lafs_pin_dblock(inodb, ReleaseSpace);
685                 if (err < 0) {
686                         dir_delete_abort(&doh);
687                         break;
688                 }
689                 inode_dec_link_count(inode);
690                 dir_delete_commit(&doh, fs, dir, name, len);
691                 lafs_inode_checkpin(inode);
692                 lafs_dirty_dblock(inodb);
693                 clear_bit(B_PinPending, &inodb->b.flags);
694                 err = 0;
695                 break;
696
697         case DIROP_REN_SOURCE:
698                 rr = kmalloc(sizeof(*rr) + len, GFP_KERNEL);
699                 if (!rr) {
700                         err = -ENOMEM;
701                         break;
702                 }
703                 rr->next = fs->pending_renames;
704                 rr->key = handle;
705                 rr->dir = dir; igrab(dir);
706                 rr->inode = inode; igrab(inode);
707                 rr->nlen = len;
708                 strncpy(rr->name, name, len);
709                 rr->name[len] = 0;
710                 fs->pending_renames = rr;
711                 rr = NULL;
712                 break;
713
714         case DIROP_REN_TARGET:
715                 rrp = &fs->pending_renames;
716                 while (*rrp) {
717                         rr = *rrp;
718                         if (rr->key == handle)
719                                 break;
720                         rrp = &rr->next;
721                 }
722                 if (!*rrp) {
723                         rr = NULL;
724                         err = -EINVAL;
725                         break;
726                 }
727                 *rrp = rr->next;
728                 rr->next = NULL;
729
730                 last = (inode && inode->i_nlink == 1);
731
732                 /* FIXME check both are dirs or non-dirs, and that a
733                  * target directory is empty */
734                 err = dir_delete_prepare(fs, rr->dir,
735                                          rr->name, rr->nlen,
736                                          &old_doh);
737                 olddb = lafs_inode_dblock(rr->inode, SYNC, MKREF(roll_dir));
738                 if (IS_ERR(olddb))
739                         err = PTR_ERR(olddb);
740                 if (inode) {
741                         /*unlink inode, update name */
742                         err = dir_update_prepare(fs, dir, name, len, &doh)
743                                 ?: err;
744                         inodb = lafs_inode_dblock(inode, SYNC, MKREF(roll_dir));
745                         if (IS_ERR(inodb))
746                                 err = PTR_ERR(inodb);
747                         if (last && !err)
748                                 err = lafs_make_orphan(fs, inodb);
749                         lafs_iolock_block(&inodb->b);
750                         set_bit(B_PinPending, &inodb->b.flags);
751                         lafs_iounlock_block(&inodb->b);
752                 } else
753                         /* create new link */
754                         err = dir_create_prepare(fs, dir, name, len,
755                                                  rr->inode->i_ino,
756                                                  mode_to_dt(rr->inode->i_mode),
757                                                  &doh) ?: err;
758
759                 if (!err) {
760                         lafs_iolock_block(&olddb->b);
761                         set_bit(B_PinPending, &olddb->b.flags);
762                         lafs_iounlock_block(&olddb->b);
763                 }
764
765                 err = err ?: dir_delete_pin(&old_doh);
766                 err = err ?: lafs_pin_dblock(olddb, ReleaseSpace);
767                 if (inode) {
768                         err = err ?: lafs_pin_dblock(inodb, ReleaseSpace);
769                         err = err ?: dir_update_pin(&doh);
770                 } else
771                         err = err ?: dir_create_pin(&doh);
772                 if (err < 0) {
773                         dir_delete_abort(&old_doh);
774                         if (inode)
775                                 dir_update_abort(&doh);
776                         else
777                                 dir_create_abort(&doh);
778                         break;
779                 }
780                 dir_delete_commit(&old_doh, fs, rr->dir, rr->name, rr->nlen);
781                 if (S_ISDIR(rr->inode->i_mode)) {
782                         inode_dec_link_count(rr->dir);
783                         if (!inode)
784                                 inode_inc_link_count(dir);
785                 }
786                 if (inode)
787                         dir_update_commit(fs, rr->inode->i_ino,
788                                           mode_to_dt(rr->inode->i_mode),
789                                           &doh);
790                 else
791                         dir_create_commit(&doh, fs, dir, name, len,
792                                           rr->inode->i_ino,
793                                           mode_to_dt(rr->inode->i_mode));
794                 switch (LAFSI(rr->inode)->type) {
795                 case TypeFile:
796                         LAFSI(rr->inode)->md.file.parent = dir->i_ino;
797                         break;
798                 case TypeInodeFile:
799                         LAFSI(rr->inode)->md.fs.parent = dir->i_ino;
800                         break;
801                 }
802                 if (inode) {
803                         if (S_ISDIR(inode->i_mode))
804                                 inode_dec_link_count(inode);
805                         inode_dec_link_count(inode);
806                         lafs_inode_checkpin(inode);
807                 }
808                 lafs_dirty_inode(rr->inode);
809                 lafs_inode_checkpin(rr->dir);
810                 lafs_inode_checkpin(dir);
811                 clear_bit(B_PinPending, &olddb->b.flags);
812                 if (inode) {
813                         clear_bit(B_PinPending, &inodb->b.flags);
814                         putdref(inodb, MKREF(dir_roll));
815                 }
816                 err = 0;
817                 break;
818         }
819         if (inode && !IS_ERR(inode))
820                 iput(inode);
821         if (inodb && !IS_ERR(inodb))
822                 putdref(inodb, MKREF(roll_dir));
823         if (olddb && !IS_ERR(olddb))
824                 putdref(olddb, MKREF(roll_dir));
825         if (rr) {
826                 iput(rr->dir);
827                 iput(rr->inode);
828                 kfree(rr);
829         }
830         return err;
831 }
832 /*------------------------------------------------------------
833  * Now we have the lowlevel operations in place, we
834  * can implement the VFS interface.
835  */
836 static int
837 lafs_create(struct inode *dir, struct dentry *de, int mode,
838      struct nameidata *nd)
839 {
840 /* Need to allocate an inode and space in the directory */
841         struct fs *fs = fs_from_inode(dir);
842         struct datablock *db;
843         struct inode *ino = lafs_new_inode(fs, LAFSI(dir)->filesys,
844                                            dir, TypeFile, 0, mode, &db);
845         struct dirop_handle doh;
846         struct update_handle uh;
847         int err;
848
849         if (IS_ERR(ino))
850                 return PTR_ERR(ino);
851
852         err = dir_create_prepare(fs, dir, de->d_name.name, de->d_name.len,
853                                  ino->i_ino, DT_REG, &doh);
854
855         dprintk("ERR = %d\n", err);
856         err = dir_log_prepare(&uh, fs, &de->d_name) ?: err;
857         dprintk("ERR2 = %d\n", err);
858         if (err)
859                 goto abort;
860
861 retry:
862         dprintk("lc: dirblk = %p\n", doh.dirent_block);
863         lafs_checkpoint_lock(fs);
864
865         err = dir_create_pin(&doh);
866         err = err ?: lafs_cluster_update_pin(&uh);
867         err = err ?: lafs_pin_dblock(db, NewSpace);
868         if (err == -EAGAIN) {
869                 lafs_checkpoint_unlock_wait(fs);
870                 goto retry;
871         }
872         if (err < 0)
873                 goto abort_unlock;
874
875         ino->i_nlink = 1;
876         lafs_add_orphan(fs, db);
877         LAFSI(ino)->md.file.parent = dir->i_ino;
878         lafs_dirty_inode(ino);
879         lafs_inode_checkpin(ino);
880         dir_log_commit(&uh, fs, dir, &de->d_name, ino->i_ino, DIROP_LINK, NULL);
881         dir_create_commit(&doh, fs, dir, de->d_name.name, de->d_name.len,
882                           ino->i_ino, DT_REG);
883         lafs_checkpoint_unlock(fs);
884         d_instantiate(de, ino);
885         clear_bit(B_PinPending, &db->b.flags);
886         putdref(db, MKREF(inode_new));
887         return 0;
888
889 abort_unlock:
890         lafs_checkpoint_unlock(fs);
891 abort:
892         lafs_cluster_update_abort(&uh);
893         dir_create_abort(&doh);
894         iput(ino);
895         clear_bit(B_PinPending, &db->b.flags);
896         putdref(db, MKREF(inode_new));
897         return err;
898 }
899
900 static int
901 lafs_link(struct dentry *from, struct inode *dir, struct dentry *to)
902 {
903         /* Create the new name and increase the link count on the target */
904         struct fs *fs = fs_from_inode(dir);
905         struct dirop_handle doh;
906         struct update_handle uh;
907         struct inode *inode = from->d_inode;
908         struct datablock *inodb;
909         int err;
910
911         if (inode->i_nlink >= LAFS_MAX_LINKS)
912                 return -EMLINK;
913         err = dir_create_prepare(fs, dir, to->d_name.name, to->d_name.len,
914                                  inode->i_ino, mode_to_dt(inode->i_mode),
915                                  &doh);
916         err = dir_log_prepare(&uh, fs, &to->d_name) ?: err;
917
918         inodb = lafs_inode_dblock(dir, SYNC, MKREF(link));
919         if (IS_ERR(inodb))
920                 err = PTR_ERR(inodb);
921         if (err)
922                 goto abort;
923 retry:
924         lafs_checkpoint_lock(fs);
925
926         err = dir_create_pin(&doh);
927         err = err ?: lafs_cluster_update_pin(&uh);
928         err = err ?: lafs_pin_dblock(inodb, NewSpace);
929         if (err == -EAGAIN) {
930                 lafs_checkpoint_unlock_wait(fs);
931                 goto retry;
932         }
933         if (err < 0)
934                 goto abort_unlock;
935
936         inode_inc_link_count(inode);
937         lafs_inode_checkpin(inode);
938         lafs_dirty_dblock(inodb);
939         clear_bit(B_PinPending, &inodb->b.flags);
940         putdref(inodb, MKREF(link));
941
942         dir_log_commit(&uh, fs, dir, &to->d_name, inode->i_ino,
943                        DIROP_LINK, NULL);
944         dir_create_commit(&doh, fs, dir, to->d_name.name, to->d_name.len,
945                           inode->i_ino, mode_to_dt(inode->i_mode));
946         /* Don't log the nlink change - that is implied in the name creation */
947         d_instantiate(to, inode);
948
949         lafs_checkpoint_unlock(fs);
950         return 0;
951 abort_unlock:
952         lafs_checkpoint_unlock(fs);
953         clear_bit(B_PinPending, &inodb->b.flags);
954 abort:
955         if (!IS_ERR(inodb))
956                 putdref(inodb, MKREF(link));
957         dir_create_abort(&doh);
958         lafs_cluster_update_abort(&uh);
959         return err;
960 }
961
962 static int
963 lafs_unlink(struct inode *dir, struct dentry *de)
964 {
965         struct fs *fs = fs_from_inode(dir);
966         struct inode *inode = de->d_inode;
967         int last = (inode->i_nlink == 1);
968         struct dirop_handle doh;
969         struct update_handle uh;
970         struct datablock *inodb;
971         int err;
972
973         dprintk("unlink %s\n", de->d_name.name);
974
975         err = dir_delete_prepare(fs, dir, de->d_name.name, de->d_name.len,
976                                  &doh);
977         BUG_ON(err == -ENOENT);
978         err = dir_log_prepare(&uh, fs, &de->d_name) ?: err;
979         inodb = lafs_inode_dblock(inode, SYNC, MKREF(inode_update));
980         if (IS_ERR(inodb))
981                 err = PTR_ERR(inodb);
982         if (last && !err)
983                 err = lafs_make_orphan(fs, inodb);
984         if (err)
985                 goto abort;
986         lafs_iolock_block(&inodb->b);
987         set_bit(B_PinPending, &inodb->b.flags);
988         lafs_iounlock_block(&inodb->b);
989 retry:
990         lafs_checkpoint_lock(fs);
991
992         err = dir_delete_pin(&doh);
993         err = err ?: lafs_cluster_update_pin(&uh);
994         err = err ?: lafs_pin_dblock(inodb, ReleaseSpace);
995         if (err == -EAGAIN) {
996                 lafs_checkpoint_unlock_wait(fs);
997                 goto retry;
998         }
999         if (err < 0)
1000                 goto abort_unlock;
1001
1002         inode_dec_link_count(inode);
1003         dir_log_commit(&uh, fs, dir, &de->d_name, inode->i_ino,
1004                        DIROP_UNLINK, NULL);
1005         dir_delete_commit(&doh, fs, dir, de->d_name.name, de->d_name.len);
1006         lafs_checkpoint_unlock(fs);
1007         lafs_inode_checkpin(inode);
1008         lafs_dirty_dblock(inodb);
1009         clear_bit(B_PinPending, &inodb->b.flags);
1010         putdref(inodb, MKREF(inode_update));
1011         return 0;
1012 abort_unlock:
1013         clear_bit(B_PinPending, &inodb->b.flags);
1014         lafs_checkpoint_unlock(fs);
1015 abort:
1016         if (!IS_ERR(inodb))
1017                 putdref(inodb, MKREF(inode_update));
1018         lafs_cluster_update_abort(&uh);
1019         dir_delete_abort(&doh);
1020         return err;
1021 }
1022
1023 static void dir_flush_orphans(struct fs *fs, struct inode *inode)
1024 {
1025         /*
1026          * Orphans cannot clear while we hold i_mutex, so
1027          * we have to run them ourselves.
1028          */
1029         struct datablock *db;
1030         DEFINE_WAIT(wq);
1031         while ((db = lafs_find_orphan(inode))) {
1032                 int still_orphan;
1033                 prepare_to_wait(&fs->async_complete, &wq,
1034                                 TASK_UNINTERRUPTIBLE);
1035                 getdref(db, MKREF(rmdir_orphan));
1036                 lafs_dir_handle_orphan(db);
1037                 still_orphan = test_bit(B_Orphan, &db->b.flags);
1038                 putdref(db, MKREF(rmdir_orphan));
1039                 if (still_orphan)
1040                         /* still an orphan, need to wait */
1041                         schedule();
1042         }
1043         finish_wait(&fs->async_complete, &wq);
1044 }
1045
1046 static int
1047 lafs_rmdir(struct inode *dir, struct dentry *de)
1048 {
1049         struct fs *fs = fs_from_inode(dir);
1050         struct inode *inode = de->d_inode;
1051         struct dirop_handle doh;
1052         struct update_handle uh;
1053         struct datablock *inodb;
1054         int err;
1055
1056         if (inode->i_nlink > 2)
1057                 return -ENOTEMPTY;
1058         if (inode->i_size) {
1059                 /* Probably not empty, but it could be that we
1060                  * just need to wait for orphans the clear.
1061                  */
1062                 dir_flush_orphans(fs, inode);
1063                 if (inode->i_size)
1064                         return -ENOTEMPTY;
1065         }
1066
1067         dprintk("rmdir %s\n", de->d_name.name);
1068
1069         err = dir_delete_prepare(fs, dir, de->d_name.name, de->d_name.len,
1070                                  &doh);
1071         err = dir_log_prepare(&uh, fs, &de->d_name) ?: err;
1072         inodb = lafs_inode_dblock(inode, SYNC, MKREF(inode_update));
1073         if (IS_ERR(inodb))
1074                 err = PTR_ERR(inodb);
1075         if (!err)
1076                 err = lafs_make_orphan(fs, inodb);
1077         if (err)
1078                 goto abort;
1079         lafs_iolock_block(&inodb->b);
1080         set_bit(B_PinPending, &inodb->b.flags);
1081         lafs_iounlock_block(&inodb->b);
1082 retry:
1083         lafs_checkpoint_lock(fs);
1084
1085         err = dir_delete_pin(&doh);
1086         err = err ?: lafs_cluster_update_pin(&uh);
1087         err = err ?: lafs_pin_dblock(inodb, ReleaseSpace);
1088         if (err == -EAGAIN) {
1089                 lafs_checkpoint_unlock_wait(fs);
1090                 goto retry;
1091         }
1092         if (err < 0)
1093                 goto abort_unlock;
1094
1095         inode_dec_link_count(dir);
1096         inode_dec_link_count(inode); /* . */
1097         inode_dec_link_count(inode); /* .. */
1098         dir_log_commit(&uh, fs, dir, &de->d_name, inode->i_ino,
1099                        DIROP_UNLINK, NULL);
1100         dir_delete_commit(&doh, fs, dir, de->d_name.name, de->d_name.len);
1101         lafs_dirty_dblock(inodb);
1102         clear_bit(B_PinPending, &inodb->b.flags);
1103         putdref(inodb, MKREF(inode_update));
1104         lafs_inode_checkpin(inode);
1105         lafs_inode_checkpin(dir);
1106         lafs_checkpoint_unlock(fs);
1107         return 0;
1108 abort_unlock:
1109         lafs_checkpoint_unlock(fs);
1110         clear_bit(B_PinPending, &inodb->b.flags);
1111 abort:
1112         if (!IS_ERR(inodb))
1113                 putdref(inodb, MKREF(inode_update));
1114         lafs_cluster_update_abort(&uh);
1115         dir_delete_abort(&doh);
1116         return err;
1117 }
1118
1119 static int
1120 lafs_symlink(struct inode *dir, struct dentry *de,
1121              const char *symlink)
1122 {
1123         int l;
1124         struct inode *ino;
1125         struct fs *fs = fs_from_inode(dir);
1126         struct datablock *b, *inodb;
1127         struct dirop_handle doh;
1128         struct update_handle uh;
1129         char *buf;
1130         int err;
1131
1132         l = strlen(symlink);
1133         if (l > fs->blocksize-1)
1134                 return -ENAMETOOLONG;
1135
1136         ino = lafs_new_inode(fs, LAFSI(dir)->filesys, dir,
1137                              TypeSymlink, 0, 0666, &inodb);
1138         if (IS_ERR(ino))
1139                 return PTR_ERR(ino);
1140         b = lafs_get_block(ino, 0, NULL, GFP_KERNEL, MKREF(symlink));
1141         if (!b) {
1142                 putdref(inodb, MKREF(inode_new));
1143                 iput(ino);
1144                 return -ENOMEM;
1145         }
1146         set_bit(B_PinPending, &b->b.flags);
1147
1148         err = dir_create_prepare(fs, dir, de->d_name.name, de->d_name.len,
1149                                  ino->i_ino, DT_LNK, &doh);
1150         err = dir_log_prepare(&uh, fs, &de->d_name) ?: err;
1151         if (err)
1152                 goto abort;
1153 retry:
1154         lafs_checkpoint_lock(fs);
1155
1156         err = dir_create_pin(&doh);
1157         err = err ?: lafs_pin_dblock(b, NewSpace);
1158         err = err ?: lafs_cluster_update_pin(&uh);
1159         if (err == -EAGAIN) {
1160                 lafs_checkpoint_unlock_wait(fs);
1161                 goto retry;
1162         }
1163         if (err < 0)
1164                 goto abort_unlock;
1165
1166         ino->i_nlink = 1;
1167         LAFSI(ino)->md.file.parent = dir->i_ino;
1168         lafs_add_orphan(fs, inodb);
1169
1170         lafs_iolock_block(&b->b);
1171         buf = map_dblock(b);
1172         memcpy(buf, symlink, l);
1173         buf[l] = 0;
1174         unmap_dblock(b, buf);
1175         set_bit(B_Valid, &b->b.flags);
1176         lafs_dirty_dblock(b);
1177         clear_bit(B_PinPending, &b->b.flags);
1178         lafs_cluster_allocate(&b->b, 0); /* Content will go in the next cluster - almost like
1179                                           * an update */
1180         putdref(b, MKREF(symlink));
1181         i_size_write(ino, l);
1182         lafs_dirty_inode(ino);
1183         lafs_inode_checkpin(ino);
1184
1185         dir_log_commit(&uh, fs, dir, &de->d_name, ino->i_ino, DIROP_LINK, NULL);
1186         dir_create_commit(&doh, fs, dir, de->d_name.name, de->d_name.len,
1187                           ino->i_ino, DT_LNK);
1188         lafs_checkpoint_unlock(fs);
1189         d_instantiate(de, ino);
1190         putdref(inodb, MKREF(inode_new));
1191         return 0;
1192 abort_unlock:
1193         lafs_checkpoint_unlock(fs);
1194 abort:
1195         putdref(inodb, MKREF(inode_new));
1196         clear_bit(B_PinPending, &b->b.flags);
1197         putdref(b, MKREF(symlink));
1198         dir_create_abort(&doh);
1199         lafs_cluster_update_abort(&uh);
1200         iput(ino);
1201         return err;
1202 }
1203
1204 static int
1205 lafs_mkdir(struct inode *dir, struct dentry *de, int mode)
1206 {
1207         struct inode *ino;
1208         struct lafs_inode *lai;
1209         struct fs *fs = fs_from_inode(dir);
1210         int err;
1211         struct dirop_handle doh;
1212         struct update_handle uh;
1213         struct datablock *inodb;
1214
1215         if (dir->i_nlink >= LAFS_MAX_LINKS)
1216                 return -EMLINK;
1217
1218         ino = lafs_new_inode(fs, LAFSI(dir)->filesys, dir,
1219                              TypeDir, 0, mode, &inodb);
1220         if (IS_ERR(ino))
1221                 return PTR_ERR(ino);
1222
1223         err = dir_create_prepare(fs, dir, de->d_name.name, de->d_name.len,
1224                                  ino->i_ino, DT_DIR, &doh);
1225         err = dir_log_prepare(&uh, fs, &de->d_name) ?: err;
1226         if (err)
1227                 goto abort;
1228 retry:
1229         lafs_checkpoint_lock(fs);
1230
1231         err = dir_create_pin(&doh);
1232         err = err ?: lafs_pin_dblock(inodb, NewSpace);
1233         err = err ?: lafs_cluster_update_pin(&uh);
1234         if (err == -EAGAIN) {
1235                 lafs_checkpoint_unlock_wait(fs);
1236                 goto retry;
1237         }
1238         if (err < 0)
1239                 goto abort_unlock;
1240
1241         lai = LAFSI(ino);
1242         lai->md.file.parent = dir->i_ino;
1243         inode_inc_link_count(dir);
1244         ino->i_nlink = 2; /* From parent, and from '.' */
1245         lafs_dirty_inode(ino);
1246         lafs_inode_checkpin(dir);
1247         lafs_inode_checkpin(ino);
1248         lafs_add_orphan(fs, inodb);
1249         dir_create_commit(&doh, fs, dir, de->d_name.name, de->d_name.len,
1250                           ino->i_ino, DT_DIR);
1251         d_instantiate(de, ino);
1252         clear_bit(B_PinPending, &inodb->b.flags);
1253         putdref(inodb, MKREF(inode_new));
1254         lafs_checkpoint_unlock(fs);
1255         return 0;
1256 abort_unlock:
1257         lafs_checkpoint_unlock(fs);
1258 abort:
1259         dir_create_abort(&doh);
1260         lafs_cluster_update_abort(&uh);
1261         iput(ino);
1262         clear_bit(B_PinPending, &inodb->b.flags);
1263         putdref(inodb, MKREF(inode_new));
1264         return err;
1265 }
1266
1267 static int
1268 lafs_mknod(struct inode *dir, struct dentry *de, int mode,
1269            dev_t rdev)
1270 {
1271         struct inode *ino;
1272         struct fs *fs = fs_from_inode(dir);
1273         int err;
1274         struct dirop_handle doh;
1275         struct update_handle uh;
1276         struct datablock *inodb;
1277         int type;
1278
1279         if (!new_valid_dev(rdev))
1280                 return -EINVAL;
1281
1282         type = TypeSpecial;
1283         switch (mode & S_IFMT) {
1284         case S_IFREG:
1285                 type = TypeFile;
1286                 break;
1287         case S_IFCHR:
1288         case S_IFBLK:
1289         case S_IFIFO:
1290         case S_IFSOCK:
1291                 break;
1292         default:
1293                 return -EINVAL;
1294         }
1295         ino = lafs_new_inode(fs, LAFSI(dir)->filesys, dir,
1296                              type, 0, mode, &inodb);
1297         if (IS_ERR(ino))
1298                 return PTR_ERR(ino);
1299         init_special_inode(ino, ino->i_mode, rdev);
1300
1301         err = dir_create_prepare(fs, dir, de->d_name.name, de->d_name.len,
1302                                  ino->i_ino, type, &doh);
1303         err = dir_log_prepare(&uh, fs, &de->d_name) ?: err;
1304         if (err)
1305                 goto abort;
1306 retry:
1307         lafs_checkpoint_lock(fs);
1308
1309         err = dir_create_pin(&doh);
1310         err = err ?: lafs_pin_dblock(inodb, NewSpace);
1311         err = err ?: lafs_cluster_update_pin(&uh);
1312         if (err == -EAGAIN) {
1313                 lafs_checkpoint_unlock_wait(fs);
1314                 goto retry;
1315         }
1316         if (err < 0)
1317                 goto abort_unlock;
1318
1319         LAFSI(ino)->md.file.parent = dir->i_ino;
1320         ino->i_nlink = 1;
1321         lafs_add_orphan(fs, inodb);
1322         lafs_dirty_inode(ino);
1323         lafs_inode_checkpin(ino);
1324
1325         dir_log_commit(&uh, fs, dir, &de->d_name, ino->i_ino, DIROP_LINK, NULL);
1326         dir_create_commit(&doh, fs, dir, de->d_name.name, de->d_name.len,
1327                           ino->i_ino, type);
1328         lafs_checkpoint_unlock(fs);
1329         d_instantiate(de, ino);
1330         clear_bit(B_PinPending, &inodb->b.flags);
1331         putdref(inodb, MKREF(inode_new));
1332         return 0;
1333 abort_unlock:
1334         lafs_checkpoint_unlock(fs);
1335 abort:
1336         dir_create_abort(&doh);
1337         lafs_cluster_update_abort(&uh);
1338         iput(ino);
1339         clear_bit(B_PinPending, &inodb->b.flags);
1340         putdref(inodb, MKREF(inode_new));
1341         return err;
1342 }
1343
1344 static int
1345 lafs_rename(struct inode *old_dir, struct dentry *old_dentry,
1346             struct inode *new_dir, struct dentry *new_dentry)
1347 {
1348         /* VFS has checked that this is file->file or dir->dir or
1349          * something->nothing.
1350          * We just need to check that if the target is a directory,
1351          * it is empty, then perform the rename
1352          */
1353         struct fs *fs = fs_from_inode(old_dir);
1354
1355         struct inode *old_inode = old_dentry->d_inode;
1356         struct inode *new_inode = new_dentry->d_inode;
1357
1358         struct datablock *olddb, *newdb = newdb;
1359
1360         struct dirop_handle old_doh, new_doh;
1361         struct update_handle old_uh, new_uh;
1362         int last = (new_inode && new_inode->i_nlink == 1);
1363         u32 renhandle;
1364         int err;
1365
1366         if (S_ISDIR(old_inode->i_mode)) {
1367                 if (new_inode) {
1368                         if (new_inode->i_size) {
1369                                 dir_flush_orphans(fs, new_inode);
1370                                 if (new_inode->i_size)
1371                                         return -ENOTEMPTY;
1372                         }
1373                 } else if (new_dir != old_dir) {
1374                         /* New dir is getting a new link */
1375                         if (new_dir->i_nlink >= LAFS_MAX_LINKS)
1376                                 return -EMLINK;
1377                 }
1378         }
1379         dprintk("rename %s %s\n", old_dentry->d_name.name,
1380                 new_dentry->d_name.name);
1381
1382         /* old entry gets deleted, new entry gets created or updated. */
1383         err = dir_delete_prepare(fs, old_dir,
1384                                  old_dentry->d_name.name,
1385                                  old_dentry->d_name.len,
1386                                  &old_doh);
1387         err = dir_log_prepare(&old_uh, fs, &old_dentry->d_name) ?: err;
1388         err = dir_log_prepare(&new_uh, fs, &new_dentry->d_name) ?: err;
1389         olddb = lafs_inode_dblock(old_inode, SYNC, MKREF(inode_update));
1390         if (IS_ERR(olddb))
1391                 err = PTR_ERR(olddb);
1392
1393         if (new_inode) {
1394                 /* unlink object, update name */
1395                 err = dir_update_prepare(fs, new_dir,
1396                                          new_dentry->d_name.name,
1397                                          new_dentry->d_name.len,
1398                                          &new_doh) ?: err;
1399                 newdb = lafs_inode_dblock(new_inode, SYNC, MKREF(inode_update));
1400                 if (IS_ERR(newdb))
1401                         err = PTR_ERR(newdb);
1402                 if (last && !err)
1403                         err = lafs_make_orphan(fs, newdb);
1404                 lafs_iolock_block(&newdb->b);
1405                 set_bit(B_PinPending, &newdb->b.flags);
1406                 lafs_iounlock_block(&newdb->b);
1407         } else
1408                 /* create new link */
1409                 err = dir_create_prepare(fs, new_dir,
1410                                          new_dentry->d_name.name,
1411                                          new_dentry->d_name.len,
1412                                          old_inode->i_ino,
1413                                          mode_to_dt(old_inode->i_mode),
1414                                          &new_doh) ?: err;
1415
1416         if (err)
1417                 goto abort;
1418
1419         lafs_iolock_block(&olddb->b);
1420         set_bit(B_PinPending, &olddb->b.flags);
1421         lafs_iounlock_block(&olddb->b);
1422 retry:
1423         lafs_checkpoint_lock(fs);
1424
1425         err = dir_delete_pin(&old_doh);
1426         err = err ?: lafs_cluster_update_pin(&old_uh);
1427         err = err ?: lafs_cluster_update_pin(&new_uh);
1428         err = err ?: lafs_pin_dblock(olddb, ReleaseSpace);
1429         if (new_inode) {
1430                 err = err ?: lafs_pin_dblock(newdb, NewSpace);
1431                 err = err ?: dir_update_pin(&new_doh);
1432         } else
1433                 err = err ?: dir_create_pin(&new_doh);
1434
1435         if (err == -EAGAIN) {
1436                 lafs_checkpoint_unlock_wait(fs);
1437                 goto retry;
1438         }
1439         if (err < 0)
1440                 goto abort_unlock;
1441
1442         /* OK, let's do the deed */
1443         dir_delete_commit(&old_doh, fs, old_dir, old_dentry->d_name.name,
1444                           old_dentry->d_name.len);
1445         dir_log_commit(&old_uh, fs, old_dir, &old_dentry->d_name,
1446                        old_inode->i_ino, DIROP_REN_SOURCE, &renhandle);
1447         dir_log_commit(&new_uh, fs, new_dir, &new_dentry->d_name,
1448                        new_inode ? new_inode->i_ino : 0,
1449                        DIROP_REN_TARGET,
1450                        &renhandle);
1451         if (S_ISDIR(old_inode->i_mode)) {
1452                 inode_dec_link_count(old_dir);
1453                 if (!new_inode)
1454                         inode_inc_link_count(new_dir);
1455         }
1456         if (new_inode)
1457                 dir_update_commit(fs, old_inode->i_ino,
1458                                   mode_to_dt(old_inode->i_mode),
1459                                   &new_doh);
1460         else
1461                 dir_create_commit(&new_doh, fs, new_dir,
1462                                   new_dentry->d_name.name,
1463                                   new_dentry->d_name.len,
1464                                   old_inode->i_ino,
1465                                   mode_to_dt(old_inode->i_mode));
1466         switch (LAFSI(old_inode)->type) {
1467         case TypeFile:
1468                 LAFSI(old_inode)->md.file.parent = new_dir->i_ino;
1469                 break;
1470         case TypeInodeFile:
1471                 LAFSI(old_inode)->md.fs.parent = new_dir->i_ino;
1472                 break;
1473         }
1474         if (new_inode) {
1475                 if (S_ISDIR(new_inode->i_mode))
1476                         inode_dec_link_count(new_inode);
1477                 inode_dec_link_count(new_inode);
1478                 lafs_inode_checkpin(new_inode);
1479         }
1480         lafs_dirty_inode(old_inode);
1481         lafs_inode_checkpin(new_dir);
1482         lafs_inode_checkpin(old_dir);
1483         clear_bit(B_PinPending, &olddb->b.flags);
1484         putdref(olddb, MKREF(inode_update));
1485         if (new_inode) {
1486                 clear_bit(B_PinPending, &newdb->b.flags);
1487                 putdref(newdb, MKREF(inode_new));
1488         }
1489
1490         lafs_checkpoint_unlock(fs);
1491         return 0;
1492
1493 abort_unlock:
1494         lafs_checkpoint_unlock(fs);
1495         clear_bit(B_PinPending, &olddb->b.flags);
1496         if (new_inode)
1497                 clear_bit(B_PinPending, &newdb->b.flags);
1498 abort:
1499         dir_delete_abort(&old_doh);
1500         lafs_cluster_update_abort(&old_uh);
1501         lafs_cluster_update_abort(&new_uh);
1502         if (!IS_ERR(olddb))
1503                 putdref(olddb, MKREF(inode_update));
1504         if (new_inode) {
1505                 dir_update_abort(&new_doh);
1506                 if (!IS_ERR(newdb))
1507                         putdref(newdb, MKREF(inode_new));
1508         } else
1509                 dir_create_abort(&new_doh);
1510         return err;
1511 }
1512
1513 /*--------------------------------------------------------------------
1514  * Directory Orphan handling.
1515  *
1516  * blocks in a directory file that are 'orphans' have recently had a deletion
1517  * and may need:
1518  *   - to be punched as a hole if empty
1519  *   - to have 'deleted' entries purged in they are freeable
1520  *   - to schedule next block for orphan handling if that might be appropriate.
1521  *
1522  *
1523  * Specifically:
1524  *  Lock the directory.
1525  *   If last possible entry (addr-1) is deleted,
1526  *    look for next entry.
1527  *    If it doesn't exist, remove last entry an preceding deleted
1528  *     entries, just like with delete.
1529  *   If first block is 'deleted' and next is removed,
1530  *     remove that deleted entry.
1531  *     look for previous entry.
1532  *     if it is deleted, schedule orphan handling.
1533  *   If ->root is 0, punch a hole
1534  */
1535
1536 int lafs_dir_handle_orphan(struct datablock *b)
1537 {
1538         struct inode *dir = b->b.inode;
1539         struct fs *fs = fs_from_inode(dir);
1540         int bits = dir->i_blkbits-8;
1541         u32 seed = LAFSI(dir)->md.file.seed;
1542         u32 hash;
1543         char *buf, *buf2;
1544         struct datablock *b2 = NULL;
1545         u8 piece, firstpiece;
1546         struct dir_ent de;
1547         int err = 0;
1548
1549         dprintk("HANDLE ORPHAN h=%x %s\n", (unsigned)hash, strblk(&b->b));
1550
1551         if (!lafs_iolock_written_async(&b->b))
1552                 return -EAGAIN;
1553         set_bit(B_PinPending, &b->b.flags);
1554         lafs_iounlock_block(&b->b);
1555
1556         lafs_checkpoint_lock(fs);
1557
1558         if (!test_bit(B_Valid, &b->b.flags)) {
1559                 /* probably have already erased this block,
1560                  * but the orphan_release failed due to
1561                  * space being tight.
1562                  * just try again
1563                  */
1564                 lafs_orphan_release(fs, b);
1565                 err = 0;
1566                 goto abort;
1567         }
1568
1569         /* First test:  Does a chain of deleted entries extend beyond
1570          * the end of this block.  i.e. is the last entry deleted.
1571          * If so, look at the next block and see if the chain is still
1572          * anchored, or if it can all be released.
1573          */
1574         buf = map_dblock(b);
1575         hash = (b->b.fileaddr-1) & MaxDirHash;
1576         if (lafs_dir_find(buf, bits, seed, hash, &piece) &&
1577             lafs_dir_extract(buf, bits, &de, piece, NULL)->target == 0) {
1578                 loff_t bnum;
1579                 unmap_dblock(b, buf);
1580                 bnum = b->b.fileaddr + 1;
1581                 if (lafs_find_next(dir, &bnum) == 0)
1582                         /* FIXME what if it returns an error */
1583                         bnum = 0;
1584
1585                 b2 = lafs_get_block(dir, bnum, NULL, GFP_KERNEL,
1586                                     MKREF(dir_orphan));
1587                 err = -ENOMEM;
1588                 if (!b2)
1589                         goto abort;
1590                 err = lafs_read_block_async(b2);
1591                 if (err)
1592                         goto abort;
1593
1594                 buf2 = map_dblock(b2);
1595                 if (lafs_dir_find(buf2, bits, seed, hash+1, &piece) == 0) {
1596                         u8 firstpiece;
1597                         /* We can remove that last entry, and maybe others */
1598                         unmap_dblock(b2, buf2);
1599
1600                         err = lafs_pin_dblock(b, ReleaseSpace);
1601                         if (err)
1602                                 goto abort;
1603                         buf = map_dblock(b);
1604                         lafs_dir_find(buf, bits, seed, 0, &firstpiece);
1605                         do {
1606                                 if (piece == firstpiece)
1607                                         break;
1608                                 lafs_dir_del_ent(buf, bits, seed, hash);
1609                                 hash--;
1610                         } while (lafs_dir_find(buf, bits, seed, hash, &piece) &&
1611                                  lafs_dir_extract(buf, bits, &de, piece,
1612                                                   NULL)->target == 0);
1613                         unmap_dblock(b, buf);
1614                         lafs_dirty_dblock(b);
1615                 } else
1616                         unmap_dblock(b2, buf2);
1617                 buf = map_dblock(b);
1618                 putdref(b2, MKREF(dir_orphan));
1619                 b2 = NULL;
1620         }
1621
1622         /* Second test:  if we have an unanchored chain at the start
1623          * of the block, then schedule orphan handling for previous block,
1624          * and remove the unanchor.
1625          */
1626         lafs_dir_find(buf, bits, seed, 0, &firstpiece);
1627         hash = seed;
1628         if (firstpiece &&
1629             lafs_dir_extract(buf, bits, &de, firstpiece, &hash)->target == 0 &&
1630             lafs_dir_find(buf, bits, seed, hash+1, &piece) == 0) {
1631                 unmap_dblock(b, buf);
1632                 b2 = lafs_get_block(dir, hash, NULL, GFP_KERNEL,
1633                                     MKREF(dir_orphan));
1634                 err = -ENOMEM;
1635                 if (!b2)
1636                         goto abort;
1637                 err = lafs_read_block_async(b2);
1638                 if (err)
1639                         goto abort;
1640
1641                 buf2 = map_dblock(b2);
1642                 if (lafs_dir_find(buf2, bits, seed, (hash-1) & MaxDirHash,
1643                                   &piece) &&
1644                     lafs_dir_extract(buf2, bits, &de, piece, NULL)->target == 0)
1645                         err = lafs_make_orphan_nb(fs, b2);
1646                 unmap_dblock(b2, buf2);
1647                 putdref(b2, MKREF(dir_orphan));
1648                 b2 = NULL;
1649                 if (err)
1650                         goto abort;
1651                 err = lafs_pin_dblock(b, ReleaseSpace);
1652                 if (err)
1653                         goto abort;
1654                 buf = map_dblock(b);
1655                 lafs_dir_del_ent(buf, bits, seed, hash);
1656                 lafs_dirty_dblock(b);
1657         }
1658
1659         if (lafs_dir_empty(buf)) {
1660                 loff_t bnum;
1661                 unmap_dblock(b, buf);
1662
1663                 err = lafs_pin_dblock(b, ReleaseSpace);
1664                 if (err)
1665                         goto abort;
1666
1667                 bnum = 1;
1668                 err = lafs_find_next(dir, &bnum);
1669                 if (err < 0)
1670                         goto abort;
1671                 if (err == 0) {
1672                         if (b->b.fileaddr == 0)
1673                                 i_size_write(dir, 0);
1674                         else {
1675                                 b2 = lafs_get_block(dir, 0, NULL, GFP_KERNEL,
1676                                                     MKREF(dir_orphan));
1677                                 err = -ENOMEM;
1678                                 if (!b2)
1679                                         goto abort;
1680                                 err = lafs_read_block_async(b2);
1681                                 if (err)
1682                                         goto abort;
1683                                 buf2 = map_dblock(b2);
1684                                 i_size_write(dir,
1685                                              lafs_dir_blk_size(buf2, bits));
1686                                 unmap_dblock(b2, buf2);
1687                                 putdref(b2, MKREF(dir_orphan));
1688                                 b2 = NULL;
1689                         }
1690                         lafs_dirty_inode(dir);
1691                 }
1692                 lafs_erase_dblock(b);
1693         } else
1694                 unmap_dblock(b, buf);
1695
1696         lafs_orphan_release(fs, b);
1697         err = 0;
1698
1699 abort:
1700         clear_bit(B_PinPending, &b->b.flags);
1701         putdref(b2, MKREF(dir_orphan));
1702         lafs_checkpoint_unlock(fs);
1703         return err;
1704 }
1705
1706 /*--------------------------------------------------------------------
1707  * Finally the read-only operations
1708  */
1709 static int
1710 lafs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1711 {
1712         struct dentry *dentry = filp->f_dentry;
1713         struct lafs_inode *lai = LAFSI(dentry->d_inode);
1714         ino_t ino;
1715         loff_t i = filp->f_pos;
1716         loff_t bnum;
1717         u32 hash;
1718         int err = 0;
1719         int over;
1720         int bits = dentry->d_inode->i_blkbits - 8;
1721         u32 seed = lai->md.file.seed;
1722
1723         switch (i) {
1724         case 0:
1725                 ino = dentry->d_inode->i_ino;
1726                 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
1727                         break;
1728                 filp->f_pos++;
1729                 i++;
1730                 /* fallthrough */
1731         case 1:
1732                 ino = lai->md.file.parent;
1733                 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
1734                         break;
1735                 filp->f_pos++;
1736                 i++;
1737                 /* fallthrough */
1738         default:
1739                 hash = i - 2;
1740                 err = 0;
1741                 over = 0;
1742                 do {
1743                         struct datablock *b;
1744                         char *buf;
1745
1746                         bnum = hash+1;
1747                         switch (lafs_find_next(dentry->d_inode, &bnum)) {
1748                         case 1:
1749                                 break;
1750                         case 0:
1751                                 bnum = 0;
1752                                 break;
1753                         default:
1754                                 return -EIO;
1755                         }
1756                         b = lafs_get_block(dentry->d_inode, bnum, NULL,
1757                                            GFP_KERNEL, MKREF(readdir));
1758                         if (!b) {
1759                                 err = -ENOMEM;
1760                                 break;
1761                         }
1762                         err = lafs_read_block(b);
1763                         if (err)
1764                                 break;
1765                         /* buf = map_dblock(b); */
1766                         buf = kmap(b->page);
1767                         buf += dblock_offset(b);
1768                         while (1) {
1769                                 u8 piece;
1770                                 struct dir_ent de;
1771
1772                                 lafs_dir_find(buf, bits, seed, hash, &piece);
1773                                 if (!piece)
1774                                         break;
1775                                 hash = seed;
1776                                 lafs_dir_extract(buf, bits, &de,
1777                                                  piece, &hash);
1778
1779                                 if (de.target == 0) {
1780                                         hash++;
1781                                         filp->f_pos = hash+2;
1782                                         continue;
1783                                 }
1784                                 /* This is a good name to return */
1785                                 over = filldir(dirent, de.name, de.nlen,
1786                                                hash+2, de.target, de.type);
1787                                 hash++;
1788                                 if (!over)
1789                                         filp->f_pos = hash+2;
1790                                 else
1791                                         break;
1792                         }
1793                         /* unmap_dblock(b, buf); */
1794                         kunmap(b->page);
1795                         putdref(b, MKREF(readdir));
1796                         hash = bnum;
1797                 } while (bnum && !over);
1798                 break;
1799         }
1800         return err;
1801 }
1802
1803 static struct dentry *
1804 lafs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
1805 {
1806         /* Simple lookup that maps inode number to that inode. */
1807         u32 inum = 0;
1808         struct inode *ino;
1809         int err = dir_lookup(dir, dentry->d_name.name, dentry->d_name.len,
1810                              &inum);
1811
1812         if (err == -ENOENT) {
1813                 d_add(dentry, NULL);
1814                 return NULL;
1815         }
1816         /* FIXME range check inum */
1817
1818         if (err < 0)
1819                 return ERR_PTR(err);
1820         ino = lafs_iget(LAFSI(dir)->filesys, inum, SYNC);
1821
1822         if (IS_ERR(ino))
1823                 return ERR_PTR(PTR_ERR(ino));
1824         return d_splice_alias(ino, dentry);
1825 }
1826
1827 static int lafs_getattr_dir(struct vfsmount *mnt, struct dentry *dentry,
1828                             struct kstat *stat)
1829 {
1830         lafs_fillattr(dentry->d_inode, stat);
1831         /* hide 'holes' in directories by making the size match
1832          * the number of allocated blocks.
1833          */
1834         if (stat->size > dentry->d_inode->i_sb->s_blocksize)
1835                 stat->size = (dentry->d_inode->i_sb->s_blocksize *
1836                              (LAFSI(dentry->d_inode)->cblocks +
1837                               LAFSI(dentry->d_inode)->pblocks +
1838                               LAFSI(dentry->d_inode)->ablocks));
1839         return 0;
1840 }
1841
1842 const struct file_operations lafs_dir_file_operations = {
1843         .llseek         = generic_file_llseek,  /* Just set 'pos' */
1844         .read           = generic_read_dir,     /* return error */
1845         .readdir        = lafs_readdir,
1846 #if 0
1847         .fsync          = lafs_sync_cluster,
1848 #endif
1849 };
1850
1851 const struct inode_operations lafs_dir_ino_operations = {
1852         .lookup         = lafs_lookup,
1853         .create         = lafs_create,
1854         .link           = lafs_link,
1855         .unlink         = lafs_unlink,
1856         .symlink        = lafs_symlink,
1857         .mkdir          = lafs_mkdir,
1858         .rmdir          = lafs_rmdir,
1859         .rename         = lafs_rename,
1860         .mknod          = lafs_mknod,
1861         .setattr        = lafs_setattr,
1862         .getattr        = lafs_getattr_dir,
1863 };