]> git.neil.brown.name Git - LaFS.git/blob - super.c
cluster.c - various tidy-ups.
[LaFS.git] / super.c
1
2 /*
3  * fs/lafs/super.c
4  * Copyright (C) 2005-2009
5  * Neil Brown <neilb@suse.de>
6  * Released under the GPL, version 2
7  */
8
9 #include        "lafs.h"
10 #include        <linux/namei.h>
11 #include        <linux/crc32.h>
12 #include        <linux/statfs.h>
13 #include        <linux/mount.h>
14 #include        <linux/exportfs.h>
15 #include        <linux/slab.h>
16
17 static struct super_operations lafs_sops;
18 static const struct export_operations lafs_export_ops;
19
20 /*---------------------------------------------------------------------
21  * Write out state and super blocks
22  *  The super blocks only need to be written when the geometry of the
23  *  array changes such as when a device is added, removed, or resized.
24  *  So we don't bother with that just yet.
25  *  The state block needs to be written - twice on each device - whenever
26  *  a checkpoint is completed.  All copies are identical and the writes
27  *  proceed in parallel.  There are 4 stateblock locations on each device.
28  *  2 are typically less recent than the other two.  We over-write the
29  *  less-recent copies.
30  *  FIXME on a RAID4 we should pad the write to be a full stripe.
31  *
32  * Locking issues:  This is called from the checkpoint thread and so
33  *  it does not race with anything else exclusive to that thread.
34  *  The nonlog information needs to be reviewed once that functionality
35  *  is implemented.
36  */
37
38 int lafs_write_state(struct fs *fs)
39 {
40         struct lafs_state *st;
41         int i, d;
42
43         fs->seq++;
44         st = fs->state;
45         st->seq = cpu_to_le32(fs->seq);
46         st->nonlog_segment = cpu_to_le32(fs->nonlog_segment);
47         st->nonlog_dev = cpu_to_le16(fs->nonlog_dev);
48         st->nonlog_offset = cpu_to_le16(fs->nonlog_offset);
49         st->nextyouth = cpu_to_le16(fs->youth_next);
50         st->checkpointcluster = cpu_to_le64(fs->checkpointcluster);
51         for (i = 0; i < fs->maxsnapshot; i++)
52                 st->root_inodes[i] = cpu_to_le64(fs->ss[i].root_addr);
53
54         st->checksum = 0;
55         st->checksum = crc32_le(0, (unsigned char *)st, fs->statesize);
56
57         for (d = 0; d < fs->devices ; d++)
58                 for (i = (fs->seq & 1); i < 4 ; i += 2)
59                         lafs_super_write(fs, d, fs->devs[d].stateaddr[i] >> 9,
60                                          (char *)st, fs->statesize);
61         lafs_super_wait(fs);
62         /* FIXME what about a write error ??? */
63         return 0;
64 }
65
66 static int
67 valid_devblock(struct lafs_dev *db, sector_t addr)
68 {
69         /* check that this devblock is valid, given that
70          * it was found at sector 'addr'
71          */
72         u32 crc, crc2;
73         if (strncmp(db->idtag, "LaFS-DeviceBlock", 16) != 0)
74                 return 0;
75         if (strncmp(db->version, "AlphaDevel      ", 16) != 0)
76                 return 0;
77         /* uuid can be anything */
78         crc = db->checksum;
79         db->checksum = 0;
80         crc2 = crc32_le(0, (unsigned char *)db, LAFS_DEVBLK_SIZE);
81         db->checksum = crc;
82         if (crc2 != crc) {
83                 dprintk("%lx != %lx\n", (unsigned long)crc,
84                         (unsigned long)crc2);
85                 return 0;
86         }
87
88         addr = addr << 9; /* convert to byte */
89         if (le64_to_cpu(db->devaddr[0]) != addr &&
90             le64_to_cpu(db->devaddr[1]) != addr)
91                 return 0;
92
93         if (db->statebits < 10 || db->statebits > 16)
94                 return 0;
95         if (db->blockbits < 9 || db->blockbits > 20)
96                 return 0;
97         if (db->width < 1 || db->width > 500)
98                 return 0;
99         if (db->stride < 1)
100                 return 0;
101         /* devaddr[0] must be early, [1] must be late */
102         if (le64_to_cpu(db->devaddr[0]) >=
103             le64_to_cpu(db->segment_offset))
104                 return 0;
105
106         if (le64_to_cpu(db->devaddr[1]) <
107             le64_to_cpu(db->segment_offset) +
108             ((((sector_t)le32_to_cpu(db->segment_count)
109                * le32_to_cpu(db->segment_size)))
110              << le32_to_cpu(db->blockbits)))
111                 return 0;
112
113         /* we should be fairly flexable about addresses of state blocks,
114          * we should probably allow more, and we should just make sure
115          * they do not overlap any true segments....
116          * FIXME
117          */
118
119         /* 2 is an absolute minimum segment size, a few hundred is more
120          * likely. We'll put a lower limit of 8, and an upper of 800000
121          */
122         if (le32_to_cpu(db->segment_size) < 8 ||
123             le32_to_cpu(db->segment_size) > 800000)
124                 return 0;
125
126         if (le32_to_cpu(db->segment_offset) >
127             (le32_to_cpu(db->segment_size)<<db->blockbits) * 10)
128                 return 0;
129
130         /* FIXME should range check segment_count, but need to know
131          * size for that */
132         if (le32_to_cpu(db->level) > 10)
133                 return 0;
134
135         /* I guess it look sane enough... */
136         return 1;
137 }
138
139 static int
140 compare_dev(struct lafs_dev *orig, struct lafs_dev *new)
141 {
142         /* Both these are known to be valid.
143          * Return:
144          *   0 if they are for same filesystem, but 'new' is older
145          *   1 if they are for same filesystem, and 'new' is newer
146          *  -1 if they are for different filesystems
147          */
148         if (memcmp(orig->uuid, new->uuid, 16))
149                 return -1;
150         if (u32_after(le32_to_cpu(new->seq),
151                       le32_to_cpu(orig->seq)))
152                 return 1;
153         return 0;
154 }
155
156 static int
157 valid_stateblock(struct lafs_state *st, struct lafs_dev *dv)
158 {
159         /* Given the 'dv' devblock, make sure 'st' is a valid
160          * and consistent stateblock
161          */
162         u32 crc;
163         if (strncmp(st->idtag, "LaFS-State-Block", 16) != 0)
164                 return 0;
165         if (strncmp(st->version, "AlphaDevel      ", 16) != 0)
166                 return 0;
167         crc = st->checksum;
168         st->checksum = 0;
169         if (crc32_le(0, (unsigned char *)st, 1<<dv->statebits) != crc)
170                 return 0;
171         st->checksum = crc;
172
173         if (memcmp(st->uuid, dv->uuid, 16))
174                 return 0;
175         /* FIXME cannot quite be that big! */
176         if (le32_to_cpu(st->maxsnapshot) > (1<<(dv->statebits-3)))
177                 return 0;
178
179         return 1;
180 }
181
182 static int
183 compare_state(struct lafs_state *orig, struct lafs_state *new)
184 {
185         /* return 1 if 'new' is actually newer than 'orig'.
186          * We already know they are both valid and have the same
187          * uuid... I don't think there is anything else to be checked
188          */
189         return u32_after(le32_to_cpu(new->seq), le32_to_cpu(orig->seq));
190 }
191
192 /*
193  * Mount options.
194  * As we can have multiple devices, things are slightly non-obvious.
195  * The 'devname' can be either a device name, starting '/', or
196  * a filesytem name (not starting '/').
197  * The 'data' is a standard comma-separated list of options.
198  * For 'mount' these are:
199  *    dev=/dev/X
200  *              - devices in addition to 'dev_name'
201  *    new=/dev/X
202  *              - A new device, with a superblock already present, to be added.
203  *    incomplete
204  *              - don't complain if not all devices are given
205  *    ?? quota stuff, cleaning parameters,
206  *
207  * For 'remount', options are
208  *    dev=  - add another device
209  *    new=  - the device is being added.
210  *
211  */
212
213 struct options {
214         int devcnt;
215         int curr_dev;
216         int statebits, blockbits;
217         struct devent {
218                 const char *dev;
219                 int is_new;
220                 int is_name;
221                 struct block_device *bdev;
222                 struct lafs_dev *devblock;
223                 struct lafs_state *stateblock;
224                 int devchoice, statechoice;
225         } *devlist;
226         const char *name;
227 };
228 static int
229 count_devs(const char *name, char *data)
230 {
231         int cnt = 0;
232         if (*name == '/')
233                 cnt = 1;
234         while (data && *data) {
235                 if (strncmp(data, "dev=", 4) == 0)
236                         cnt++;
237                 if (strncmp(data, "new=", 4) == 0)
238                         cnt++;
239                 data = strchr(data, ',');
240                 if (data)
241                         data++;
242         }
243         return cnt;
244 }
245
246 static int
247 parse_opts(struct options *op, const char *name, char *data)
248 {
249         int dv = 0;
250         char *p;
251
252         memset(op, 0, sizeof(*op));
253         op->devcnt = count_devs(name, data);
254         op->devlist = kzalloc(op->devcnt*sizeof(op->devlist[0]), GFP_KERNEL);
255
256         if (!op->devlist)
257                 return -ENOMEM;
258
259         op->name = NULL;
260         if (*name == '/') {
261                 op->devlist[dv].is_name = 1;
262                 op->devlist[dv++].dev = name;
263         } else
264                 op->name = name;
265         while ((p = strsep(&data, ",")) != NULL) {
266                 if (!*p)
267                         continue;
268                 if (strncmp(p, "dev=", 4) == 0)
269                         op->devlist[dv++].dev = p+4;
270                 else if (strncmp(p, "new=", 4) == 0) {
271                         op->devlist[dv].is_new = 1;
272                         op->devlist[dv++].dev = p+4;
273                 } else {
274                         printk(KERN_ERR
275                                "LaFS: Unrecognised mount option \"%s\"\n", p);
276                         return -EINVAL;
277
278                 }
279         }
280         op->devcnt = dv;
281
282         return 0;
283 }
284
285 static int
286 lafs_load_super(struct block_device *bdev, void *opv, int silent)
287 {
288         /* Find the devblock and the stateblock for this device
289
290          * Only do basic internal consistancy checks.  Inter-device
291          * checks happen later
292          */
293         struct options *op = opv;
294         struct devent *dv;
295         struct page *pg;
296         sector_t sect, dev_addr = 0, state_addr = 0;
297         int err = 0;
298         unsigned int n;
299         int i;
300         int have_dev = 0, have_state = 0;
301
302         dv = &op->devlist[op->curr_dev];
303         BUG_ON(dv->devblock);
304         BUG_ON(dv->stateblock);
305
306         n = queue_logical_block_size(bdev->bd_disk->queue);
307         if (n < LAFS_DEVBLK_SIZE)
308                 n = LAFS_DEVBLK_SIZE;
309         BUG_ON(n > PAGE_SIZE);
310         dv->devblock = kmalloc(n, GFP_KERNEL);
311         if (!dv->devblock)
312                 return -ENOMEM;
313         pg = alloc_page(GFP_KERNEL);
314         if (!pg)
315                 return -ENOMEM;
316
317         /* Now find a devblock, check the first two possible locations,
318          * and the last two.  If two devblocks are found with different
319          * uuids, we are confused!
320          */
321         sect = 0;
322         for (i = 0; i < 4; i++) {
323                 /* try to read block at 'sect' */
324                 int ok = lafs_sync_page_io(bdev, sect, 0, n, pg, READ);
325
326                 if (ok && valid_devblock(page_address(pg), sect)) {
327                         if (!have_dev) {
328                                 have_dev = 1;
329                                 memcpy(dv->devblock, page_address(pg), n);
330                                 dev_addr = sect;
331                         } else switch (compare_dev(dv->devblock,
332                                                    page_address(pg))) {
333                                 case 0: /* older, do nothing */
334                                         break;
335                                 case 1: /* newer, overwrite */
336                                         memcpy(dv->devblock, page_address(pg), n);
337                                         dev_addr = sect;
338                                         break;
339                                 default: /* inconsistent --- HELP */
340                                         printk(KERN_ERR "LaFS: inconsistent device-blocks found.\n");
341                                         err = -EINVAL;
342                                         goto out;
343                                 }
344                 }
345
346                 if (i != 1)
347                         sect += (n>>9);
348                 else {
349                         sect = bdev->bd_inode->i_size & ~(sector_t)(n-1);
350                         sect >>= 9;
351                         sect -= (n>>9)*2;
352                 }
353         }
354         /* FIXME - we've lost the read error, if it was significant */
355         err = -EINVAL;
356         if (!have_dev) {
357                 if (!silent)
358                         printk(KERN_ERR "LaFS - no valid devblock found.\n");
359                 goto out;
360         }
361
362         /* OK, we have a valid devblock, that's nice.
363          * Now we should be able to find some stateblocks.
364          * The locations are in the devblock
365          */
366         n = le32_to_cpu(1<<dv->devblock->statebits);
367         if ((n & (n-1)) ||
368             n < queue_logical_block_size(bdev->bd_disk->queue) ||
369             n > 128*1024) {
370                 printk(KERN_ERR "LaFS: statesize of %u not acceptable.\n", n);
371                 err = -EINVAL;
372                 goto out;
373         }
374         dv->stateblock = kmalloc(n, GFP_KERNEL);
375         err = -ENOMEM;
376         if (!dv->stateblock)
377                 goto out;
378         for (i = 0; i < 4; i++) {
379                 int ok;
380                 sect = le64_to_cpu(dv->devblock->stateaddr[i])>>9;
381                 ok = lafs_sync_page_io(bdev, sect, 0, n, pg, READ);
382                 if (ok && valid_stateblock(page_address(pg), dv->devblock)) {
383                         if (!have_state) {
384                                 have_state = 1;
385                                 memcpy(dv->stateblock, page_address(pg), n);
386                                 state_addr = i;
387                         } else if (compare_state(dv->stateblock,
388                                                  page_address(pg))) {
389                                 memcpy(dv->stateblock, page_address(pg), n);
390                                 state_addr = i;
391                         }
392                 }
393         }
394
395         if (have_state) {
396                 err = 0;
397                 dv->devchoice = dev_addr;
398                 dv->statechoice = state_addr;
399         } else {
400                 err = -EINVAL;
401                 if (!silent)
402                         printk(KERN_ERR "LaFS: no valid stateblock found.\n");
403         }
404 out:
405         page_cache_release(pg);
406         return err;
407 }
408
409 static int
410 check_devs(struct options *op)
411 {
412         /* Check we have enough, that they are for the same
413          * uuid, and they they don't overlap
414          * Also check that 'seq' number of devblocks
415          * are within '1'
416          */
417         int seqlo = le32_to_cpu(op->devlist[0].devblock->seq);
418         int seqhi = le32_to_cpu(op->devlist[0].devblock->seq);
419         int newdev = 0;
420         int newstate = 0;
421         int i, j;
422
423         for (i = 1; i < op->devcnt; i++) {
424                 if (memcmp(op->devlist[0].stateblock->uuid,
425                            op->devlist[i].stateblock->uuid,
426                            16) != 0)
427                         return -EINVAL;
428
429                 if (le32_to_cpu(op->devlist[i].devblock->seq) == seqlo)
430                         ;
431                 else if (le32_to_cpu(op->devlist[i].devblock->seq) == seqlo+1) {
432                         newdev = i;
433                         seqhi = seqlo+1;
434                 } else if (le32_to_cpu(op->devlist[i].devblock->seq) == seqhi-1)
435                         seqlo = seqhi-1;
436                 else
437                         return -EINVAL;
438
439                 if (u32_after(le32_to_cpu(op->devlist[i].stateblock->seq),
440                               le32_to_cpu(op->devlist[newstate].
441                                           stateblock->seq)))
442                         newstate = i;
443         }
444         if (le32_to_cpu(op->devlist[newstate].stateblock->devices)
445             != op->devcnt)
446                 return -EINVAL;
447
448         op->statebits = op->devlist[0].devblock->statebits;
449         op->blockbits = op->devlist[0].devblock->blockbits;
450
451         /* Now check devices don't overlap in start/size.
452          * We do a simple quadratic search
453          */
454         for (i = 0; i < op->devcnt; i++)
455                 for (j = 0; j < op->devcnt; j++)
456                         if (i != j)
457                                 if (le64_to_cpu(op->devlist[i].devblock->start) <
458                                     le64_to_cpu(op->devlist[j].devblock->start) &&
459
460                                     le64_to_cpu(op->devlist[i].devblock->start)+
461                                     le64_to_cpu(op->devlist[i].devblock->size) >
462                                     le64_to_cpu(op->devlist[j].devblock->start))
463                                         return -EINVAL;
464         return newstate;
465 }
466
467 /* we identify lafs superblocks by the filesystem uuid.  This means
468  * that block-level snapshots cannot be mounted.  You should use
469  * fs-level snapshots instead.
470  */
471 static int sb_test(struct super_block *sb, void *data)
472 {
473         struct sb_key *ptn = data;
474         struct sb_key *sk = sb->s_fs_info;
475         return memcmp(ptn->fs->state->uuid,
476                       sk->fs->state->uuid, 16) == 0;
477 }
478
479 static int sb_set(struct super_block *sb, void *data)
480 {
481         struct sb_key *ptn = data;
482         sb->s_fs_info = ptn;
483         return set_anon_super(sb, NULL);
484 }
485
486
487 static int
488 lafs_load(struct fs *fs, struct options *op, int newest)
489 {
490         /* We seem to have a full set of devices for the filesystem.
491          * Time to create our fs_info structure and fill it out.
492          * This only includes information from the dev and state blocks.
493          * Finding the root-inode comes a bit later.
494          */
495         struct lafs_state *st;
496         int i;
497         int err;
498         struct sb_key *k;
499
500         st = fs->state = op->devlist[newest].stateblock;
501         op->devlist[newest].stateblock = NULL;
502 #ifdef DUMP
503         dfs = fs;
504 #endif
505
506         fs->seq = le32_to_cpu(st->seq);
507         fs->levels = le32_to_cpu(st->levels);
508         fs->devices = op->devcnt;
509         fs->devs_loaded = fs->devices; /* FIXME use this or lose this */
510         fs->statesize = 1 << op->statebits;
511         fs->blocksize = 1 << op->blockbits;
512         fs->blocksize_bits = op->blockbits;
513
514         fs->nonlog_segment = le32_to_cpu(st->nonlog_segment);
515         fs->nonlog_dev = le16_to_cpu(st->nonlog_dev);
516         fs->nonlog_offset = le16_to_cpu(st->nonlog_offset);
517         fs->youth_next = le16_to_cpu(st->nextyouth);
518         fs->checkpoint_youth = fs->youth_next;
519         if (fs->youth_next < 8)
520                 fs->youth_next = 8;
521         fs->scan.first_free_pass = 1;
522         fs->scan.free_dev = -1;
523
524         fs->maxsnapshot = le32_to_cpu(st->maxsnapshot);
525
526         fs->scan.free_usages = kmalloc(PAGE_SIZE, GFP_KERNEL);
527         err = lafs_segtrack_init(fs->segtrack);
528
529         fs->ss = kzalloc(sizeof(struct snapshot)*fs->maxsnapshot, GFP_KERNEL);
530         if (!fs->ss || !fs->scan.free_usages || err) {
531                 if (!err)
532                         err = -ENOMEM;
533                 goto abort;
534         }
535
536         fs->checkpointcluster = le64_to_cpu(st->checkpointcluster);
537         for (i = 0; i < fs->maxsnapshot; i++) {
538                 fs->ss[i].root_addr =
539                         le64_to_cpu(st->root_inodes[i]);
540                 dprintk("root inode %d are %llu\n",
541                         i, fs->ss[i].root_addr);
542         }
543         INIT_LIST_HEAD(&fs->pending_orphans);
544         INIT_LIST_HEAD(&fs->inode_index);
545         INIT_LIST_HEAD(&fs->phase_leafs[0]);
546         INIT_LIST_HEAD(&fs->phase_leafs[1]);
547         INIT_LIST_HEAD(&fs->clean_leafs);
548         INIT_LIST_HEAD(&fs->account_leafs);
549         atomic_set(&fs->sb_writes_pending, 0);
550         init_waitqueue_head(&fs->sb_writes_wait);
551         init_waitqueue_head(&fs->async_complete);
552         init_waitqueue_head(&fs->trunc_wait);
553         mutex_init(&fs->cleaner.lock);
554         spin_lock_init(&fs->stable_lock);
555         spin_lock_init(&fs->alloc_lock);
556         spin_lock_init(&fs->lock);
557         init_waitqueue_head(&fs->phase_wait);
558
559         INIT_WORK(&fs->done_work, lafs_done_work);
560
561         /* FIXME add congention and unplug functions to this bdi */
562         err = bdi_init(&fs->bdi);
563         if (err)
564                 goto abort;
565         
566
567         fs->phase_locked = 0;
568         for (i = 0; i < WC_NUM; i++) {
569                 int j;
570                 mutex_init(&fs->wc[i].lock);
571                 for (j = 0; j < 4 ; j++) {
572                         atomic_set(&fs->wc[i].pending_cnt[j], 0);
573                         INIT_LIST_HEAD(&fs->wc[i].pending_blocks[j]);
574                 }
575                 init_waitqueue_head(&fs->wc[i].pending_wait);
576                 fs->wc[i].seg.dev = -1;
577         }
578
579         fs->max_newsegs = 32; /* FIXME this should be configurable */
580
581         err = -ENOMEM;
582         fs->devs = kzalloc(sizeof(struct fs_dev)*fs->devices, GFP_KERNEL);
583         if (!fs->devs)
584                 goto abort;
585
586         k = kzalloc(sizeof(*k), GFP_KERNEL);
587         k->fs = fs;
588         fs->prime_sb = sget(&lafs_fs_type, sb_test, sb_set, k);
589         if (IS_ERR(fs->prime_sb)) {
590                 kfree(k);
591                 err = PTR_ERR(fs->prime_sb);
592                 goto abort;
593         }
594         if (fs->prime_sb->s_root) {
595                 /* filesystem with this uuid already exists */
596                 deactivate_locked_super(fs->prime_sb);
597                 kfree(k);
598                 fs->prime_sb = NULL;
599                 err = -EBUSY;
600                 goto abort;
601         }
602         err = bdi_register_dev(&fs->bdi, fs->prime_sb->s_dev);
603         if (err) {
604                 deactivate_locked_super(fs->prime_sb);
605                 kfree(k);
606                 fs->prime_sb = NULL;
607                 goto abort;
608         }
609         fs->prime_sb->s_bdi = &fs->bdi;
610
611         fs->prime_sb->s_blocksize = 1 << op->blockbits;
612         fs->prime_sb->s_blocksize_bits = op->blockbits;
613         fs->prime_sb->s_op = &lafs_sops;
614         fs->prime_sb->s_export_op = &lafs_export_ops;
615         fs->prime_sb->s_root = NULL;
616
617         /* We allow 29 bits for nanosecs, so they must be even. */
618         fs->prime_sb->s_time_gran = 2;
619
620         for (i = 0; i < fs->devices; i++) {
621                 struct fs_dev *dv = &fs->devs[i];
622                 struct devent *de = &op->devlist[i];
623                 int j;
624                 dv->bdev = de->bdev;
625                 de->bdev = NULL;
626
627                 dv->devblk = de->devblock;
628                 de->devblock = NULL;
629
630                 dv->recent_dev = de->devchoice;
631                 dv->recent_state = de->statechoice;
632
633                 dv->start = le64_to_cpu(dv->devblk->start);
634                 dv->size = le64_to_cpu(dv->devblk->size);
635                 dprintk("Dev %d seems to range %llu + %llu\n",
636                         i, (unsigned long long)dv->start,
637                         (unsigned long long)dv->size);
638
639                 dv->width = le16_to_cpu(dv->devblk->width);
640                 dv->stride = le32_to_cpu(dv->devblk->stride);
641                 dv->segment_size = le32_to_cpu(dv->devblk->segment_size);
642                 dv->segment_offset = le32_to_cpu(dv->devblk->segment_offset);
643                 dv->segment_count = le32_to_cpu(dv->devblk->segment_count);
644                 dv->usage_inum = le32_to_cpu(dv->devblk->usage_inum);
645                 dv->level = le16_to_cpu(dv->devblk->level);
646
647                 if (dv->segment_size > fs->max_segment)
648                         fs->max_segment = dv->segment_size;
649
650                 if (dv->width * dv->stride <= dv->segment_size) {
651                         dv->tables_per_seg = dv->segment_size /
652                                 dv->width / dv->stride;
653                         dv->rows_per_table = dv->stride;
654                         dv->segment_stride = dv->segment_size;
655                 } else {
656                         dv->tables_per_seg = 1;
657                         dv->rows_per_table = dv->segment_size / dv->width;
658                         dv->segment_stride = dv->rows_per_table;
659                 }
660                 /* table size is the number of blocks in the segment usage
661                  * file per snapshot
662                  */
663                 dv->tablesize = (dv->segment_count + (1<<(fs->blocksize_bits-1)) + 1)
664                         >> (fs->blocksize_bits-1);
665
666                 for (j = 0; j < 2; j++)
667                         dv->devaddr[j] = le64_to_cpu(dv->devblk->devaddr[j]);
668                 for (j = 0; j < 4; j++)
669                         dv->stateaddr[j] = le64_to_cpu(dv->devblk->stateaddr[j]);
670         }
671         return 0;
672
673 abort:
674         bdi_destroy(&fs->bdi);
675         kfree(fs->scan.free_usages);
676         lafs_segtrack_free(fs->segtrack);
677         kfree(fs->devs);
678         kfree(fs->ss);
679         kfree(fs);
680         return -ENOMEM;
681 }
682
683 static int show_orphans(struct fs *fs)
684 {
685         struct datablock *db;
686         printk("Orphans:\n");
687         list_for_each_entry(db, &fs->pending_orphans,
688                             orphans) {
689                 struct inode *ino = iget_my_inode(db);
690                 printk("orphan=%s\n", strblk(&db->b));
691                 if (ino)
692                         lafs_print_tree(&LAFSI(ino)->iblock->b, 0);
693                 iput(ino);
694         }
695         printk("cleaner active: %d %d\n", fs->cleaner.active,
696                fs->scan.done);
697         return 1; /* meaningless, but makes it easy to add to wait_event below */
698 }
699
700 static void lafs_kill_sb(struct super_block *sb)
701 {
702         struct fs *fs = fs_from_sb(sb);
703         /* Release the 'struct fs' */
704         int i;
705
706         /* FIXME should I refcount this when there are multiple
707          * filesets? How does that work?
708          */
709
710         /* Delay final destruction of the root inode */
711         /* FIXME all the sbs... */
712         set_bit(I_Deleting, &LAFSI(fs->ss[0].root)->iflags);
713
714         /* FIXME I'm not sure we should be waiting for the
715          * cleaner.  Maybe we should just release all tc->cleaning
716          * blocks instead.
717          */
718         set_bit(CleanerDisabled, &fs->fsstate);
719
720         wait_event(fs->async_complete,
721                    show_orphans(fs) &&
722                    !test_bit(OrphansRunning, &fs->fsstate) &&
723                    list_empty(&fs->pending_orphans) &&
724                    fs->scan.done == 1 &&
725                    fs->cleaner.active == 0);
726
727         kill_anon_super(fs->prime_sb);
728
729         bdi_destroy(&fs->bdi);
730
731         for (i = 0; i < fs->devices; i++) {
732                 struct fs_dev *dv = &fs->devs[i];
733                 kfree(dv->devblk);
734                 close_bdev_exclusive(dv->bdev, FMODE_READ|FMODE_WRITE);
735         }
736
737         /* Final checkpoint will have cleared out the leafs lists,
738          * so they should all be empty.
739          */
740         /* Lets see what is on the 'leaf' list? */
741         for (i = 0; i < 2; i++) {
742                 struct block *b;
743                 dprintk("For phase %d\n", i);
744         retry:
745                 list_for_each_entry(b, &fs->phase_leafs[i], lru) {
746                         /* FIXME this only OK for readonly mounts.
747                          */
748                         getref(b, MKREF(release));
749                         lafs_refile(b, 0);
750                         if (test_bit(B_Pinned, &b->flags)) {
751                                 /* didn't fix the pincnt !! */
752                                 printk("This was pinned: %s\n", strblk(b));
753                                 lafs_print_tree(b, 1);
754                                 BUG();
755                         }
756                         putref(b, MKREF(release));
757                         goto retry;
758                 }
759         }
760         BUG_ON(!list_empty(&fs->clean_leafs));
761
762         flush_scheduled_work();
763         lafs_stop_thread(fs);
764
765         kfree(fs->state);
766         kfree(fs->ss);
767         kfree(fs->devs);
768         lafs_segtrack_free(fs->segtrack);
769         kfree(fs->scan.free_usages);
770         kfree(fs->prime_sb->s_fs_info);
771         kfree(fs);
772 }
773
774 static void
775 lafs_put_super(struct super_block *sb)
776 {
777         struct fs *fs = fs_from_sb(sb);
778         int ss;
779         struct lafs_inode *li;
780
781         lafs_checkpoint_lock(fs);
782         lafs_checkpoint_start(fs);
783         if (sb == fs->prime_sb)
784                 /* Don't incorporate any more segusage/quota updates. */
785                 set_bit(FinalCheckpoint, &fs->fsstate);
786         lafs_checkpoint_unlock_wait(fs);
787         lafs_cluster_wait_all(fs);
788
789         if (sb == fs->prime_sb) {
790                 int d;
791                 /* This is the main sb, not a snapshot or
792                  * subordinate fs.
793                  * Now that all inodes have been invalidated we can do
794                  * the final checkpoint.
795                  */
796                 lafs_close_all_segments(fs);
797                 lafs_empty_segment_table(fs);
798                 lafs_seg_put_all(fs);
799
800                 iput(fs->orphans);
801                 fs->orphans = NULL;
802                 for (d=0; d < fs->devices; d++)
803                         if (fs->devs[d].segsum) {
804                                 iput(fs->devs[d].segsum);
805                                 fs->devs[d].segsum = NULL;
806                         }
807         }
808
809         /* need to break a circular reference... */
810         for (ss = 0; ss < fs->maxsnapshot; ss++)
811                 if (fs->ss[ss].root &&
812                     fs->ss[ss].root->i_sb == sb) {
813                         dprintk("Putting ss %d\n", ss);
814                         li = LAFSI(fs->ss[ss].root);
815                         if (test_bit(B_Realloc, &li->dblock->b.flags))
816                                 lafs_dump_tree();
817                         iput(fs->ss[ss].root);
818                         fs->ss[ss].root = NULL;
819                         break;
820                 }
821 }
822
823 static int
824 lafs_get_devs(struct fs *fs, struct options *op, int flags)
825 {
826         int err;
827         int i;
828
829         for (i = 0; i < op->devcnt; i++) {
830                 struct block_device *bdev;
831                 op->curr_dev = i;
832                 
833                 bdev = open_bdev_exclusive(op->devlist[i].dev,
834                                            FMODE_READ|FMODE_WRITE, fs);
835                 err = PTR_ERR(bdev);
836                 if (IS_ERR(bdev))
837                         goto out;
838                 err = lafs_load_super(bdev, op, flags & MS_SILENT ? 1 : 0);
839                 if (err < 0)
840                         goto out;
841                 op->devlist[i].bdev = bdev;
842         }
843         return 0;
844
845 out:
846         return err;
847 }
848
849 static int
850 lafs_get_sb(struct file_system_type *fs_type,
851             int flags, const char *dev_name, void *data,
852             struct vfsmount *mnt)
853 {
854         /* as we may have multiple devices, some in 'data', we cannot just
855          * use get_sb_bdev, we need to roll-our-own.
856          * We call get_sb_bdev on *each* bdev, and make sure the returned
857          * superblocks are either all new, or all for the same filesystem.
858          * If the later, we return the primary.
859          * If the former, we init the filesystem copying static data
860          * to all supers.
861          * First we 'open_bdev_exclusive' each device, exclusive to lafs
862          * Then we 'sget' a superblock that knows any/all the devices.
863          * This may be pre-existing, or may be new
864          * If new, it will be created knowing all devices.
865          * If pre-existing, and don't have correct device list, error
866          */
867         struct options op;
868         int err;
869         int newest;
870         struct fs *fs = kzalloc(sizeof(*fs), GFP_KERNEL);
871         char *cdata = data;
872         if (cdata == NULL)
873                 cdata = "";
874
875         err = -ENOMEM;
876         if (!fs)
877                 goto out;
878         err = parse_opts(&op, dev_name, cdata);
879         if (err)
880                 goto out;
881
882         /* We now have as list of device names.  We call open_bdev_exclusive
883          * on each to collect some superblocks.
884          */
885         err = lafs_get_devs(fs, &op, flags);
886         if (err)
887                 goto out;
888
889         /* Each device has a valid dev and state block.  Hopefully they
890          * are all for the same filesystem.  If they don't have the
891          * same uuid, we will bale-out here.  We also check that we have
892          * enough, and that they don't overlap.
893          * While we are looking at state blocks, pick the newest.
894          */
895         newest = check_devs(&op);
896         if (newest < 0) {
897                 err = newest;
898                 goto out;
899         }
900
901         /* So they seem to be the same - better create our
902          * 'fs' structure and fill it in
903          */
904         err = lafs_load(fs, &op, newest);
905         if (err)
906                 goto out;
907
908         /* Well, all the devices check out.  Now we need to find the
909          * filesystem */
910         err = lafs_mount(fs);
911         if (err == 0)
912                 err = lafs_start_thread(fs);
913         if (err)
914                 deactivate_locked_super(fs->prime_sb);
915         else {
916                 fs->prime_sb->s_flags |= MS_ACTIVE;
917                 simple_set_mnt(mnt, fs->prime_sb);
918         }
919         /* And there you have it.  Filesystem all mounted, root dir found,
920          * metadata files initialised, all pigs fed, and ready to fly!!!
921          */
922
923 out:
924         /* Now we clean up 'options'.  Anything that is wanted has
925          * been moved into 'fs', so we just discard anything we find
926          */
927         if (op.devlist) {
928                 int i;
929                 for (i = 0; i < op.devcnt; i++) {
930                         kfree(op.devlist[i].devblock);
931                         kfree(op.devlist[i].stateblock);
932                         if (op.devlist[i].bdev)
933                                 close_bdev_exclusive(op.devlist[i].bdev,
934                                                      FMODE_READ|FMODE_WRITE);
935                 }
936                 kfree(op.devlist);
937         }
938         return err;
939 }
940
941 static int test_subset(struct super_block *sb, void *data)
942 {
943         struct sb_key *ptn = data;
944         struct sb_key *k = sb->s_fs_info;
945
946         return ptn->fs == k->fs && ptn->root == k->root;
947 }
948
949 static int set_subset(struct super_block *sb, void *data)
950 {
951         sb->s_fs_info = data;
952         set_anon_super(sb, NULL);
953         return 0;
954 }
955
956 static struct file_system_type lafs_subset_fs_type;
957 struct super_block *lafs_get_subset_sb(struct inode *ino)
958 {
959         /* ino must be a TypeInodeFile inode in the prime filesystem. */
960         struct fs *fs = fs_from_inode(ino);
961         struct super_block *sb;
962         struct sb_key *k = kmalloc(sizeof(*k), GFP_KERNEL);
963
964         if (!k)
965                 return ERR_PTR(-ENOMEM);
966
967         k->fs = fs;
968         k->root = ino;
969         sb = sget(&lafs_subset_fs_type, test_subset, set_subset, k);
970         if (IS_ERR(sb)) {
971                 kfree(k);
972         } else if (sb->s_root) {
973                 /* already allocated */
974                 kfree(k);
975         } else {
976                 struct inode *rootdir, *imapfile;
977                 int err = 0;
978
979                 igrab(ino);
980                 sb->s_blocksize = fs->blocksize;
981                 sb->s_blocksize_bits = fs->blocksize_bits;
982                 sb->s_bdi = fs->prime_sb->s_bdi;
983                 sb->s_op = &lafs_sops;
984                 sb->s_export_op = &lafs_export_ops;
985                 sb->s_time_gran = 2;
986                 rootdir = lafs_iget(sb, 2, SYNC);
987                 if (IS_ERR(rootdir) && PTR_ERR(rootdir) == -ENOENT) {
988                         rootdir = lafs_new_inode(fs, sb, NULL,
989                                                  TypeDir, 2, 0755, NULL);
990                         /* FIXME could the inode get written before we set
991                          * the link count ??*/
992                         rootdir->i_nlink = 2;
993                 }
994                 if (IS_ERR(rootdir))
995                         err = PTR_ERR(rootdir);
996                 else {
997                         sb->s_root = d_alloc_root(rootdir);
998                         imapfile = lafs_iget(sb, 1, SYNC);
999                         if (IS_ERR(imapfile) && PTR_ERR(imapfile) == -ENOENT)
1000                                 imapfile = lafs_new_inode(fs, sb, NULL,
1001                                                           TypeInodeMap, 1, 0, NULL);
1002
1003                         if (IS_ERR(imapfile))
1004                                 err = PTR_ERR(imapfile);
1005                         else
1006                                 iput(imapfile);
1007                 }
1008
1009                 if (!err) {
1010                         sb->s_op = fs->prime_sb->s_op;
1011                         sb->s_flags |= MS_ACTIVE;
1012                         atomic_inc(&fs->prime_sb->s_active);
1013                         igrab(ino);
1014                 } else {
1015                         deactivate_locked_super(sb);
1016                         sb = ERR_PTR(err);
1017                 }
1018         }
1019         return sb;
1020 }
1021
1022 static int
1023 lafs_get_subset(struct file_system_type *fs_type,
1024                 int flags, const char *dev_name, void *data,
1025                 struct vfsmount *mnt)
1026 {
1027         /* mount, possibly creating, a sub-fileset.
1028          * dev_name must be an absolute path that leads
1029          * to an object in a lafs file-system (or snapshot).
1030          * The object must be either an InodeFile or
1031          * an empty directory in the main file-system
1032          * with mode 0 (though that rule might change).
1033          * In the latter case we change the object to an
1034          * InodeFile
1035          * FIXME must require readonly for snapshots, and readwrite
1036          * to create.
1037          */
1038
1039         struct nameidata nd;
1040         int err;
1041         struct super_block *sb;
1042         struct inode *ino;
1043         struct fs *fs;
1044
1045         err = path_lookup(dev_name, LOOKUP_FOLLOW, &nd);
1046         if (err)
1047                 goto out_noput;
1048         sb = nd.path.dentry->d_sb;
1049         err = -EINVAL;
1050         if (sb->s_type != &lafs_fs_type &&
1051             sb->s_type != &lafs_snap_fs_type)
1052                 goto out;
1053         ino = nd.path.dentry->d_inode;
1054         if (LAFSI(ino)->type != TypeInodeFile &&
1055             LAFSI(ino)->type != TypeDir)
1056                 goto out;
1057         fs = fs_from_sb(sb);
1058         mutex_lock(&ino->i_mutex);
1059         if (LAFSI(ino)->type == TypeDir) {
1060                 struct datablock *inodb;
1061                 /* maybe convert this to TypeInodeFile */
1062                 if (sb->s_type != &lafs_fs_type)
1063                         goto out_unlock;
1064                 if (ino->i_size)
1065                         /* FIXME maybe I should run orphans */
1066                         goto out_unlock;
1067                 if ((ino->i_mode & 07777) != 0)
1068                         goto out_unlock;
1069                 inodb = lafs_inode_dblock(ino, SYNC, MKREF(make_subset));
1070                 err = PTR_ERR(inodb);
1071                 if (IS_ERR(inodb))
1072                         goto out_unlock;
1073                 lafs_iolock_block(&inodb->b);
1074                 set_bit(B_PinPending, &inodb->b.flags);
1075                 lafs_iounlock_block(&inodb->b);
1076                 lafs_checkpoint_lock(fs);
1077                 err = lafs_pin_dblock(inodb, ReleaseSpace);
1078                 if (!err) {
1079                         struct fs_md *md;
1080                         /* OK, we are good to go making this filesystem */
1081                         LAFSI(ino)->type = TypeInodeFile;
1082                         LAFSI(ino)->metadata_size = (sizeof(struct la_inode) +
1083                                                      sizeof(struct fs_metadata));
1084                         ino->i_op = &lafs_subset_ino_operations;
1085                         ino->i_fop = &lafs_subset_file_operations;
1086                         /* FIXME we lose md->parent here - what to do?? */
1087                         md = &LAFSI(ino)->md.fs;
1088                         md->usagetable = 0;
1089                         ino->i_mtime = current_fs_time(sb);
1090                         md->cblocks_used = 0;
1091                         md->pblocks_used = 0;
1092                         md->ablocks_used = 0;
1093                         md->blocks_allowed = 10000; /* FIXME */
1094                         md->blocks_unalloc = 0;
1095                         /* FIXME should I be using inode_init here */
1096                         md->creation_age = fs->wc[0].cluster_seq;
1097                         md->inodes_used = 0;
1098                         md->quota_inums[0] = 0;
1099                         md->quota_inums[1] = 0;
1100                         md->quota_inums[2] = 0;
1101                         md->quota_inodes[0] = NULL;
1102                         md->quota_inodes[1] = NULL;
1103                         md->quota_inodes[2] = NULL;
1104                         md->name = NULL;
1105                         lafs_dirty_dblock(inodb);
1106                         lafs_dirty_inode(ino);
1107                         /* We use a checkpoint to commit this change,
1108                          * it is too unusual to bother logging
1109                          */
1110                         lafs_checkpoint_start(fs);
1111                         lafs_checkpoint_unlock_wait(fs);
1112                 } else {
1113                         lafs_checkpoint_unlock(fs);
1114                 }
1115                 putdref(inodb, MKREF(make_subset));
1116                 if (err)
1117                         goto out_unlock;
1118         }
1119         err = 0;
1120         /* We have a TypeInodeFile so we can make a superblock */
1121         sb = lafs_get_subset_sb(ino);
1122         iput(ino);
1123
1124         if (IS_ERR(sb))
1125                 err = PTR_ERR(sb);
1126         else
1127                 simple_set_mnt(mnt, sb);
1128 out_unlock:
1129         mutex_unlock(&ino->i_mutex);
1130 out:
1131         path_put(&nd.path);
1132 out_noput:
1133         return err;
1134 }
1135
1136 static void lafs_kill_subset(struct super_block *sb)
1137 {
1138         struct sb_key *k = sb->s_fs_info;
1139         kill_anon_super(sb);
1140         iput(k->root);
1141         deactivate_super(k->fs->prime_sb);
1142         kfree(k);
1143 }
1144
1145 const struct file_operations lafs_subset_file_operations = {
1146 };
1147
1148 const struct inode_operations lafs_subset_ino_operations = {
1149 };
1150
1151
1152 struct file_system_type lafs_fs_type = {
1153         .owner          = THIS_MODULE,
1154         .name           = "lafs",
1155         .get_sb         = lafs_get_sb,
1156         .kill_sb        = lafs_kill_sb,
1157         .fs_flags       = FS_REQUIRES_DEV,
1158 };
1159
1160 static struct file_system_type lafs_subset_fs_type = {
1161         .owner          = THIS_MODULE,
1162         .name           = "lafs_subset",
1163         .get_sb         = lafs_get_subset,
1164         .kill_sb        = lafs_kill_subset,
1165 };
1166
1167 static int __init lafs_init(void)
1168 {
1169         int err;
1170
1171         BUILD_BUG_ON(B_NUM_FLAGS > 32);
1172
1173         err = lafs_ihash_init();
1174         err = err ?: register_filesystem(&lafs_fs_type);
1175         err = err ?: register_filesystem(&lafs_snap_fs_type);
1176         err = err ?: register_filesystem(&lafs_subset_fs_type);
1177         if (err)
1178                 goto out;
1179         return 0;
1180
1181 out:
1182         unregister_filesystem(&lafs_fs_type);
1183         unregister_filesystem(&lafs_snap_fs_type);
1184         unregister_filesystem(&lafs_subset_fs_type);
1185         lafs_ihash_free();
1186         return err;
1187 }
1188
1189 static void __exit lafs_exit(void)
1190 {
1191         unregister_filesystem(&lafs_fs_type);
1192         unregister_filesystem(&lafs_snap_fs_type);
1193         unregister_filesystem(&lafs_subset_fs_type);
1194         lafs_ihash_free();
1195 }
1196
1197 static struct inode *lafs_nfs_get_inode(struct super_block *sb,
1198                                         u64 ino, u32 generation)
1199 {
1200         struct inode *inode;
1201
1202         inode = lafs_iget(sb, ino, SYNC);
1203         if (IS_ERR(inode))
1204                 return ERR_CAST(inode);
1205         if (generation && inode->i_generation != generation) {
1206                 iput(inode);
1207                 return ERR_PTR(-ESTALE);
1208         }
1209
1210         return inode;
1211 }
1212
1213 static struct dentry *lafs_fh_to_dentry(struct super_block *sb, struct fid *fid,
1214                                         int fh_len, int fh_type)
1215 {
1216         return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1217                                     lafs_nfs_get_inode);
1218 }
1219
1220 static struct dentry *lafs_fh_to_parent(struct super_block *sb, struct fid *fid,
1221                                         int fh_len, int fh_type)
1222 {
1223         return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1224                                     lafs_nfs_get_inode);
1225 }
1226
1227 static struct dentry *lafs_get_parent(struct dentry *child)
1228 {
1229         ino_t inum = LAFSI(child->d_inode)->md.file.parent;
1230         struct inode *inode = lafs_iget(child->d_inode->i_sb, inum, SYNC);
1231         if (IS_ERR(inode))
1232                 return ERR_CAST(inode);
1233         return d_obtain_alias(inode);
1234 }
1235
1236 static const struct export_operations lafs_export_ops = {
1237         .fh_to_dentry = lafs_fh_to_dentry,
1238         .fh_to_parent = lafs_fh_to_parent,
1239         .get_parent = lafs_get_parent,
1240 };
1241
1242 static struct inode *lafs_alloc_inode(struct super_block *sb)
1243 {
1244         struct lafs_inode *li;
1245         li = kmalloc(sizeof(*li), GFP_NOFS);
1246         if (!li)
1247                 return NULL;
1248         inode_init_once(&li->vfs_inode);
1249         li->vfs_inode.i_data.backing_dev_info = sb->s_bdi;
1250         li->iblock = NULL;
1251         li->dblock = NULL;
1252         li->update_cluster = 0;
1253
1254         init_rwsem(&li->ind_sem);
1255         INIT_LIST_HEAD(&li->free_index);
1256
1257         return &li->vfs_inode;
1258 }
1259
1260 static void kfree_inode(struct rcu_head *head)
1261 {
1262         struct lafs_inode *lai = container_of(head, struct lafs_inode,
1263                                               md.rcu);
1264         if (lai->type == TypeInodeFile)
1265                 kfree(lai->md.fs.name);
1266         kfree(lai);
1267 }
1268
1269 void lafs_destroy_inode(struct inode *inode)
1270 {
1271         struct datablock *db;
1272
1273         BUG_ON(!list_empty(&inode->i_sb_list));
1274         // Cannot test i_list as dispose_list just does list_del
1275         db = lafs_inode_get_dblock(inode, MKREF(destroy));
1276
1277         if (db) {
1278                 set_bit(I_Destroyed, &LAFSI(inode)->iflags);
1279                 putdref(db, MKREF(destroy));
1280         } else {
1281                 spin_lock(&inode->i_data.private_lock);
1282                 if (LAFSI(inode)->iblock)
1283                         LAFS_BUG(atomic_read(&LAFSI(inode)->iblock->b.refcnt),
1284                                  &LAFSI(inode)->iblock->b);
1285                 /* FIXME could there be Async blocks keeps a refcount?
1286                  * we should free them
1287                  */
1288                 spin_unlock(&inode->i_data.private_lock);
1289                 lafs_release_index(&LAFSI(inode)->free_index);
1290                 call_rcu(&LAFSI(inode)->md.rcu,
1291                          kfree_inode);
1292         }
1293 }
1294
1295 static int lafs_sync_fs(struct super_block *sb, int wait)
1296 {
1297         if (!wait)
1298                 /* We only reach here if s_dirt was set, so it
1299                  * is reasonable to force a checkpoint.
1300                  */
1301                 lafs_checkpoint_start(fs_from_sb(sb));
1302         else
1303                 printk("FIXME I should wait for the checkpoint to finish\n");
1304         return 0;
1305 }
1306
1307 static int lafs_statfs(struct dentry *de, struct kstatfs *buf)
1308 {
1309         int i;
1310         u32 fsid;
1311         u32 *fsuuid;
1312         struct fs *fs = fs_from_inode(de->d_inode);
1313         struct lafs_inode *root = LAFSI(fs->ss[0].root);
1314
1315         fsid = 0;
1316         fsuuid = (u32 *)fs->state->uuid;
1317         for (i = 0; i < 16 / 4 ; i++)
1318                 fsid ^= le32_to_cpu(fsuuid[i]);
1319
1320         spin_lock(&root->vfs_inode.i_lock);
1321         buf->f_type = 0x4C614654; /* "LaFS" */
1322         buf->f_bsize = fs->blocksize;
1323         buf->f_blocks = root->md.fs.blocks_allowed;
1324         if (buf->f_blocks == 0) {
1325                 /* should subtract usage of all other filesystems...*/
1326                 for (i = 0; i < fs->devs_loaded; i++)
1327                         buf->f_blocks += fs->devs[i].size;
1328         }
1329         /* "bavail" is "blocks we could succeed in adding to the filesystem".
1330          * "bfree" is effectively total blocks - used blocks
1331          */
1332         buf->f_bavail = fs->free_blocks + fs->clean_reserved - fs->allocated_blocks;
1333         buf->f_bfree = buf->f_blocks - (root->md.fs.cblocks_used +
1334                                         root->md.fs.pblocks_used +
1335                                         root->md.fs.ablocks_used);
1336         dprintk("df: tot=%ld free=%ld avail=%ld(%ld-%ld-%ld) cb=%ld pb=%ld ab=%ld\n",
1337                 (long)buf->f_blocks, (long)buf->f_bfree, (long)buf->f_bavail,
1338                 (long)fs->free_blocks, (long)fs->clean_reserved, (long)fs->allocated_blocks,
1339                 (long)root->md.fs.cblocks_used, (long)root->md.fs.pblocks_used,
1340                 (long)root->md.fs.ablocks_used);
1341
1342         buf->f_files = 0;
1343         buf->f_ffree = 0;
1344         buf->f_fsid.val[0] = fsid; /* FIXME */
1345         buf->f_namelen = 255;
1346         buf->f_frsize = 0;
1347         spin_unlock(&root->vfs_inode.i_lock);
1348         return 0;
1349 }
1350
1351 /* FIXME we hold inode_lock while calling drop_inode, so
1352  * extra locking isn't really welcome....???
1353  */
1354 static void lafs_drop_inode(struct inode *inode)
1355 {
1356         struct fs *fs = fs_from_inode(inode);
1357         struct datablock *db;
1358
1359         /* This lock that we now hold on the inode could prevent
1360          * the cleaner from getting the inode.  So after
1361          * the complete the drop we might need to wake the cleaner.
1362          */
1363
1364         db = lafs_inode_get_dblock(inode, MKREF(drop));
1365
1366         generic_drop_inode(inode);
1367         if (db && test_bit(B_Async, &db->b.flags))
1368                 lafs_wake_thread(fs);
1369         if (db)
1370                 putdref(db, MKREF(drop));
1371 }
1372
1373 static struct super_operations lafs_sops = {
1374         .alloc_inode    = lafs_alloc_inode,
1375         .destroy_inode  = lafs_destroy_inode,  /* Inverse of 'alloc_inode' */
1376         /* Don't use read_inode */
1377         .dirty_inode    = lafs_dirty_inode,
1378         /* .write_inode not needed */
1379         /* put_inode ?? */
1380         .drop_inode     = lafs_drop_inode,
1381         /* drop_inode ?? */                     /* default will call delete or forget
1382                                                  * where 'forget' flushes and clears
1383                                                  */
1384
1385         .clear_inode    = lafs_clear_inode,    /* forget internal state of this inode */
1386         .delete_inode   = lafs_delete_inode,   /* remove this inode from filesystem */
1387         .put_super      = lafs_put_super,
1388         .sync_fs        = lafs_sync_fs,
1389         /* write_super_lockfs ?? */
1390         /* unlockfs ?? */
1391         .statfs         = lafs_statfs,
1392         /* remount_fs ?? */
1393 };
1394
1395 MODULE_AUTHOR("Neil Brown");
1396 MODULE_DESCRIPTION("LaFS - Log Structured File System");
1397 MODULE_LICENSE("GPL");
1398 module_init(lafs_init);
1399 module_exit(lafs_exit);
1400 int lafs_trace = 1;
1401 module_param(lafs_trace, int, 0644);
1402
1403 #ifdef DUMP
1404 struct fs *dfs;
1405 static int do_dump(const char *val, struct kernel_param *kp)
1406 {
1407         extern void lafs_dump_orphans(void);
1408         extern void lafs_dump_tree(void);
1409         extern void lafs_dump_cleanable(void);
1410         extern void lafs_dump_usage(void);
1411
1412         printk("Want dump of %s\n", val);
1413         if (strncmp(val, "orphan", 6) == 0)
1414                 lafs_dump_orphans();
1415         if (strncmp(val, "tree", 4) == 0)
1416                 lafs_dump_tree();
1417         if (strncmp(val, "cleanable", 9) == 0)
1418                 lafs_dump_cleanable();
1419         if (strncmp(val, "usage", 5) == 0)
1420                 lafs_dump_usage();
1421         return 0;
1422 }
1423
1424 static int get_dump(char *buffer, struct kernel_param *kp)
1425 {
1426         strcpy(buffer, "orphans,tree,cleanable,usage");
1427         return strlen(buffer);
1428 }
1429
1430 int arg;
1431 module_param_call(dump, do_dump, get_dump, &arg, 0775);
1432 #endif