]> git.neil.brown.name Git - LaFS.git/blob - clean.c
README update
[LaFS.git] / clean.c
1
2 /*
3  * fs/lafs/clean.c
4  * Copyright (C) 2005-2010
5  * Neil Brown <neilb@suse.de>
6  * Released under the GPL, version 2
7  */
8
9 #include "lafs.h"
10
11 /* mark_cleaning
12  * Given a block that is in a segment that is being cleaner, mark
13  * and pin it so that it gets cleaner.
14  * This is written to cope with failure when allocating space, but in
15  * the current design, that should never happen.
16  */
17 static int mark_cleaning(struct block *b)
18 {
19         int err;
20         int credits;
21
22         if (test_bit(B_Realloc, &b->flags))
23                 /* As cleaning is single threaded,
24                  * we cannot race with another
25                  * pin_dblock_clean here - the
26                  * previous attempt must have succeeded
27                  */
28                 return 0;
29         err = lafs_reserve_block(b, CleanSpace);
30         if (err)
31                 return err;
32         LAFS_BUG(!test_bit(B_Valid, &b->flags), b);
33         credits = lafs_space_alloc(fs_from_inode(b->inode), 2, CleanSpace);
34         if (credits == 0)
35                 return -EAGAIN;
36         if (!test_and_set_bit(B_Realloc, &b->flags))
37                 credits--;
38
39         lafs_pin_block(b);
40         if (test_bit(B_Dirty, &b->flags)) {
41                 /* If dirty, then now that it is pinned,
42                  * it will get written, so don't need
43                  * Realloc
44                  */
45                 if (test_and_clear_bit(B_Realloc, &b->flags))
46                         credits++;
47         }
48
49         if (!test_and_set_bit(B_UnincCredit, &b->flags))
50                 credits--;
51         lafs_space_return(fs_from_inode(b->inode), credits);
52
53         lafs_refile(b, 0);
54         lafs_refile(&b->parent->b, 0);
55         return 0;
56 }
57
58 /* first_in_seg
59  * Find the first block among this and its ancenstor which
60  * is in the nominated segment - if any are.
61  * As the cluster header does not differentiate between
62  * index blocks of different depths, we need to check them
63  * all.
64  */
65 static struct block *first_in_seg(struct block *b, struct fs *fs,
66                                   int dev, u32 seg, REFARG)
67 {
68         struct address_space *as = &b->inode->i_data;
69         struct block *p;
70         if (in_seg(fs, dev, seg, b->physaddr))
71                 return getref(b, REF);
72
73         spin_lock(&as->private_lock);
74         for (p = b;
75              p && !in_seg(fs, dev, seg, p->physaddr);
76              p = &(p->parent)->b) {
77                 if (test_bit(B_InoIdx, &p->flags)) {
78                         struct datablock *db = LAFSI(p->inode)->dblock;
79
80                         spin_unlock(&as->private_lock);
81                         as = &db->b.inode->i_data;
82                         spin_lock(&as->private_lock);
83                 }
84         }
85         if (p && test_bit(B_InoIdx, &p->flags)) {
86                 struct datablock *db = LAFSI(p->inode)->dblock;
87                 spin_unlock(&as->private_lock);
88                 as = &db->b.inode->i_data;
89                 spin_lock(&as->private_lock);
90                 p = &db->b;
91         }
92
93         if (p)
94                 getref_locked(p, REF);
95
96         spin_unlock(&as->private_lock);
97         return p;
98 }
99
100 /* To 'flush' the cleaner, anything on the fs->clean_leafs needs
101  * to be either allocated to a cleaning-cluster or incorporated then added.
102  * If the list gets empty we flush out the cleaning cluster and return.
103  * The next time the cleaner runs it will come back here and do
104  * some more flushing.
105  */
106 static void cleaner_flush(struct fs *fs)
107 {
108         struct block *b;
109         int err = 0;
110         dprintk("Start cleaner_flush\n");
111         while (!err &&
112                (b = lafs_get_flushable(fs, -1)) != NULL) {
113                 int unlock = 1;
114
115                 dprintk("cleaning %s\n", strblk(b));
116
117                 if (test_bit(B_PinPending, &b->flags)) {
118                         /* Cannot safely clean this now.  Just mark
119                          * it Dirty (it probably will be soon anyway)
120                          * so it gets written to the new-data segment
121                          * which will effectively clean it.
122                          */
123                         if (!test_and_set_bit(B_Dirty, &b->flags))
124                                 if (!test_and_clear_bit(B_Realloc, &b->flags))
125                                         if (!test_and_clear_bit(B_Credit, &b->flags))
126                                                 LAFS_BUG(1, b);
127                 }
128                 if (test_bit(B_Dirty, &b->flags)) {
129                         /* Ignore this, checkpoint will take it */
130                         if (test_and_clear_bit(B_Realloc, &b->flags))
131                                 if (test_and_set_bit(B_Credit, &b->flags))
132                                         lafs_space_return(fs, 1);
133                 } else if (test_bit(B_Index, &b->flags) &&
134                            (iblk(b)->uninc ||
135                             iblk(b)->uninc_table.pending_cnt)) {
136                         lafs_incorporate(fs, iblk(b));
137                 } else {
138                         err = lafs_cluster_allocate(b, 1);
139                         unlock = 0;
140                 }
141                 if (unlock)
142                         lafs_iounlock_block(b);
143                 putref(b, MKREF(flushable));
144         }
145         lafs_cluster_flush(fs, 1);
146 }
147
148 /*
149  * Load the next cluster header for this segment and perform
150  * some validity checks.  If there is a failure, simply
151  * update the 'tc' state so that it looks like there are
152  * no more clusters to find.
153  */
154 static void cleaner_load(struct fs *fs, struct toclean *tc)
155 {
156         int err;
157         err = lafs_load_page_async(fs, tc->chead,
158                                    tc->haddr,
159                                    (PAGE_SIZE /
160                                     fs->blocksize),
161                                    &tc->ac);
162
163         BUG_ON(err && err != -EAGAIN && err != -EIO);
164
165         if (err == -EAGAIN)
166                 return;
167         if (err)
168                 //printk("CLEANER got IO error !!\n");
169                 // FIXME adjust youth to so as not to touch this again
170                 goto bad_header;
171
172         tc->ch = page_address(tc->chead);
173         if (memcmp(tc->ch->idtag, "LaFSHead", 8) != 0)
174                 goto bad_header;
175         if (memcmp(tc->ch->uuid, fs->state->uuid, 16) != 0)
176                 goto bad_header;
177         if (tc->seq == 0)
178                 tc->seq = le64_to_cpu(tc->ch->seq);
179         else {
180                 tc->seq++;
181                 if (tc->seq != le64_to_cpu(tc->ch->seq)) {
182                         printk("Bad seq number\n");
183                         goto bad_header;
184                 }
185         }
186         if (lafs_calc_cluster_csum(tc->ch) != tc->ch->checksum) {
187                 //printk("Cluster header checksum is wrong!!\n");
188                 goto bad_header;
189         }
190         dprintk("try_clean: got header %d\n", (int)tc->haddr);
191         return;
192         
193 bad_header:
194         tc->ac.state = 0;
195         tc->have_addr = 0;
196 }
197
198 /* Parse a cluster header and identify blocks that might need cleaning.
199  * Each time through we start parsing from the start.
200  * As we find blocks, or reject inodes we update the header so that
201  * the next time through we don't try those again.
202  * Once we have started IO on 16 different inodes, we take a break
203  * and let some of the IO complete.
204  * As we find blocks, we put them on a list to be processed later.
205  */
206 static void cleaner_parse(struct fs *fs, struct toclean *tc)
207 {
208         u32 bnum;
209         int bcnt;
210         u32 inum, fsnum;
211         u16 trunc;
212         struct inode *ino = NULL;
213         int again = 0;
214
215         /* Always start at the beginning - we invalidate entries
216          * that we have finished with
217          */
218         tc->gh = tc->ch->groups;
219         tc->desc = tc->gh->u.desc;
220
221         while (again < 16) {
222                 /* Load the index block for each described data or index block.
223                  * For data blocks, get the address and possibly load the
224                  * data block too.
225                  * As blocks are loaded, they will be checked for membership
226                  * in a current cleaning-segment and flagged for reallocation
227                  * if appropriate.
228                  */
229                 if ((((char *)tc->gh) - (char *)tc->ch)
230                     >= le16_to_cpu(tc->ch->Hlength)) {
231                         /* Finished with that cluster, try another. */
232                         u64 next;
233                         if (again)
234                                 /* Need to come back later */
235                                 break;
236                         next = le64_to_cpu(tc->ch->next_addr);
237                         if (next > tc->haddr &&
238                             in_seg(fs, tc->dev, tc->seg, next)) {
239                                 tc->haddr = next;
240                                 tc->ac.state = 1;
241                         } else {
242                                 tc->ac.state = 0;
243                                 tc->have_addr = 0;
244                         }
245                         tc->ch = NULL;
246                         lafs_wake_thread(fs);
247                         break;
248                 }
249                 if (((((char *)tc->desc) - (char *)tc->gh)+3)/4
250                     >= le16_to_cpu(tc->gh->group_size_words)) {
251                         /* Finished with that group, try another */
252                         /* FIXME what if group has padding at end?
253                          * this might be fixed, but need to be certain
254                          * of all possibilities. */
255                         BUG_ON(le16_to_cpu(tc->gh->group_size_words) == 0);
256                         tc->gh = (struct group_head *)(((char *)tc->gh) +
257                                                        le16_to_cpu(tc->gh->group_size_words)*4);
258                         tc->desc = tc->gh->u.desc;
259                         continue;
260                 }
261                 if (le16_to_cpu(tc->desc->block_bytes) > DescMiniOffset &&
262                     tc->desc->block_bytes != DescIndex) {
263                         /* This is a miniblock, skip it. */
264                         int len = le16_to_cpu(tc->desc->block_bytes)
265                                 - DescMiniOffset;
266                         tc->desc++;
267                         tc->desc = (struct descriptor *)
268                                 (((char *)tc->desc)
269                                  + roundup(len, 4));
270                         continue;
271                 }
272                 /* Ok, desc seems to be a valid descriptor in this group */
273                 /* Try to load the index info for block_num in inode in filesys.
274                  */
275                 // FIXME track phys block number for comparison
276                 // FIXME try to optimise out based on youth and snapshot age
277
278                 bnum = le32_to_cpu(tc->desc->block_num);
279                 bcnt = le16_to_cpu(tc->desc->block_cnt);
280                 if (bcnt == 0) {
281                         tc->desc++;
282                         continue;
283                 }
284                 inum = le32_to_cpu(tc->gh->inum);
285                 fsnum = le32_to_cpu(tc->gh->fsnum);
286                 trunc = le16_to_cpu(tc->gh->truncatenum_and_flag) & 0x7fff;
287
288                 if (inum == 0xFFFFFFFF &&
289                     fsnum == 0xFFFFFFFF) {
290                         tc->desc++;
291                         continue;
292                 }
293                 dprintk("Cleaner looking at %d/%d %d+%d (%d)\n",
294                         (int)fsnum, (int)inum, (int)bnum, (int)bcnt,
295                         (int)le16_to_cpu(tc->desc->block_bytes));
296
297                 if (fsnum == 0 && inum == 0 && bnum == 0)
298                         goto skip;
299
300                 if (ino == NULL ||
301                     ino->i_ino != inum ||
302                     LAFSI(ino)->filesys->i_ino != fsnum) {
303                         if (ino)
304                                 lafs_iput_fs(ino);
305                         if (fsnum) {
306                                 struct inode *fsino =
307                                         lafs_iget_fs(fs, 0, fsnum, ASYNC);
308                                 if (IS_ERR(fsino))
309                                         ino = fsino;
310                                 else if (LAFSI(fsino)->md.fs.creation_age > tc->seq) {
311                                         /* skip this inode as filesystem
312                                          * is newer
313                                          */
314                                         lafs_iput_fs(fsino);
315                                         ino = NULL;
316                                         goto skip_inode;
317                                 } else
318                                         lafs_iput_fs(fsino);
319                         }
320                         ino = lafs_iget_fs(fs, fsnum, inum, ASYNC);
321                 }
322                 if (!IS_ERR(ino)) {
323                         int itrunc;
324                         struct datablock *b;
325                         dprintk("got the inode\n");
326                         /* Minor optimisation for files that have shrunk */
327                         /* Actually this is critical for handling truncation
328                          * properly.  We don't want to even 'get' a block beyond
329                          * EOF, certainly not after the truncate_inode_pages.
330                          */
331                         if (LAFSI(ino)->type == 0 ||
332                             (LAFSI(ino)->type >= TypeBase &&
333                              ((loff_t)bnum << ino->i_blkbits) >= i_size_read(ino))) {
334                                 /* skip the whole descriptor */
335                                 bcnt = 1;
336                                 goto skip;
337                         }
338                         itrunc = ((ino->i_generation<<8) |
339                                   (LAFSI(ino)->trunc_gen & 0xff)) & 0x7fff;
340                         if (itrunc != trunc) {
341                                 /* file has been truncated or replaced since
342                                  * this cluster
343                                  */
344                                 goto skip_inode;
345                         }
346                         b = lafs_get_block(ino, bnum, NULL, GFP_NOFS,
347                                            MKREF(cleaning));
348                         if (b == NULL)
349                                 break;
350
351                         if (!test_and_set_bit(B_Cleaning, &b->b.flags)) {
352                                 getdref(b, MKREF(cleaning));
353                                 lafs_igrab_fs(ino);
354                         }
355                         if (LAFSI(ino)->type == TypeInodeFile ||
356                             LAFSI(ino)->type == TypeDir) {
357                                 /* Could become an orphan just now, so need
358                                  * to protect b->cleaning 
359                                  */
360                                 spin_lock(&fs->lock);
361                                 list_move_tail(&b->cleaning, &tc->cleaning);
362                                 spin_unlock(&fs->lock);
363                         } else {
364                                 /* No locking needed */
365                                 if (list_empty(&b->cleaning))
366                                         list_add_tail(&b->cleaning, &tc->cleaning);
367                         }
368
369                         /* We can race with truncate here, so need to check
370                          * i_size again now that b->cleaning is non-empty.
371                          * The thread doing the truncate will have to lock
372                          * the page holding this block, which should be enough
373                          * of a barrier so that if it sets i_size after now,
374                          * it will see that b->cleaning is non-empty.
375                          * We need to be sure that if it sets it before now,
376                          * we get to see i_size.
377                          * So I think a memory barrier is a good idea...
378                          */
379                         mb();
380
381                         if (LAFSI(ino)->type == 0 ||
382                             (LAFSI(ino)->type >= TypeBase &&
383                              ((loff_t)bnum << ino->i_blkbits)
384                              >= i_size_read(ino))) {
385                                 list_del_init(&b->cleaning);
386                                 if (test_and_clear_bit(B_Cleaning, &b->b.flags)) {
387                                         putdref(b, MKREF(cleaning));
388                                         lafs_iput_fs(ino);
389                                 }
390                         }
391                         putdref(b, MKREF(cleaning));
392                 } else  {
393                         int err = PTR_ERR(ino);
394                         ino = NULL;
395                         dprintk("iget gives error %d\n", err);
396                         if (err == -EAGAIN) {
397                                 again++;
398                                 tc->desc++;
399                                 continue;
400                         }
401                         /* inode not found, make sure we never
402                          * look for it again
403                          */
404                 skip_inode:
405                         tc->gh->inum = 0xFFFFFFFF;
406                         tc->gh->fsnum = 0xFFFFFFFF;
407                         tc->desc++;
408                         continue;
409                 }
410         skip:
411                 /* We modify the descriptor in-place to track where
412                  * we are up to.  This is a private copy.  The real
413                  * descriptor doesn't change.
414                  */
415                 tc->desc->block_num = cpu_to_le32(bnum+1);
416                 tc->desc->block_cnt = cpu_to_le16(bcnt-1);
417         }
418         if (ino)
419                 lafs_iput_fs(ino);
420 }
421
422 /* Process all blocks that have been found to possibly need to be
423  * moved from their current address (i.e. cleaned).
424  * We initiate async index lookup, then if the block really
425  * is in the target segment we initiate async read.  Once
426  * that is complete we mark the block for cleaning and
427  * releaes it.
428  */
429 static int cleaner_process(struct fs *fs, struct toclean *tc)
430 {
431         struct datablock *b, *tmp;
432         int rv = 0;
433         struct inode *ino;
434
435         dprintk("start processing list\n");
436         list_for_each_entry_safe(b, tmp, &tc->cleaning, cleaning) {
437                 struct block *cb = NULL;
438                 int err = lafs_find_block_async(b);
439                 dprintk("find_async gives %d %s\n", err, strblk(&b->b));
440                 if (err == -EAGAIN)
441                         continue;
442                 if (err)
443                         /* Eeek, what do I do?? */
444                         goto done_cleaning;
445
446                 cb = first_in_seg(&b->b, fs, tc->dev, tc->seg, MKREF(clean2));
447
448                 if (cb == NULL) {
449                         /* Moved, don't want this. */
450                         dprintk("Not in seg\n");
451                         goto done_cleaning;
452                 }
453                 err = lafs_load_block(cb, NULL);
454                 if (err)
455                         goto done_cleaning;
456
457                 err = lafs_wait_block_async(cb);
458                 if (err == -EAGAIN) {
459                         putref(cb, MKREF(clean2));
460                         continue;
461                 }
462                 if (err)
463                         goto done_cleaning;
464
465                 err = mark_cleaning(cb);
466                 dprintk("Want to clean %s (%d)\n",
467                         strblk(cb), err);
468                 if (err)
469                         rv = -1;
470
471                 /* as cb is B_Pinned, it holds an effective
472                  * ref on the inode, so it is safe to drop our
473                  * ref now
474                  */
475         done_cleaning:
476                 clear_bit(B_Cleaning, &b->b.flags);
477
478                 list_del_init(&b->cleaning);
479                 if (test_bit(B_Orphan, &b->b.flags)) {
480                         spin_lock(&fs->lock);
481                         if (test_bit(B_Orphan, &b->b.flags) &&
482                             list_empty(&b->orphans)) {
483                                 list_add(&b->orphans, &fs->pending_orphans);
484                                 lafs_wake_thread(fs);
485                         }
486                         spin_unlock(&fs->lock);
487                 }
488
489                 ino = b->b.inode;
490                 putdref(b, MKREF(cleaning));
491                 lafs_iput_fs(ino);
492                 putref(cb, MKREF(clean2));
493                 if (rv)
494                         break;
495         }
496         return rv;
497 }
498
499 /*
500  * Try to advance the process of cleaning the given segment.
501  * This may require loading a cluster head, parsing or reparsing
502  * that head, or loading some blocks.
503  */
504 static int try_clean(struct fs *fs, struct toclean *tc)
505 {
506         /* return 1 if everything has been found, -ve if we need to flush */
507         int rv = 0;
508
509         mutex_lock(&fs->cleaner.lock);
510         dprintk("try_clean: state = %d\n", tc->ac.state);
511         if (tc->ch == NULL && tc->have_addr)
512                 cleaner_load(fs, tc);
513
514         if (tc->ch)
515                 cleaner_parse(fs, tc);
516
517         if (!list_empty(&tc->cleaning))
518                 rv = cleaner_process(fs, tc);
519         if (!rv)
520                 rv = (tc->ch == NULL && !tc->have_addr &&
521                       list_empty(&tc->cleaning));
522         mutex_unlock(&fs->cleaner.lock);
523         return rv;
524 }
525
526 /*
527  * When we truncate a file, and block that is in the
528  * process of being cleaned must have that cleaning
529  * cancelled.  That is done by lafs_erase_dblock calling
530  * lafs_unclean.
531  */
532 void lafs_unclean(struct datablock *db)
533 {
534         if (!list_empty_careful(&db->cleaning)) {
535                 struct fs *fs = fs_from_inode(db->b.inode);
536                 mutex_lock(&fs->cleaner.lock);
537                 if (test_and_clear_bit(B_Cleaning, &db->b.flags)) {
538                         /* This must be on the cleaner list, so
539                          * it is safe to delete without a spinlock
540                          */
541                         list_del_init(&db->cleaning);
542                         putdref(db, MKREF(cleaning));
543                         lafs_iput_fs(db->b.inode);
544                         if (test_and_clear_bit(B_Async, &db->b.flags)) {
545                                 putdref(db, MKREF(async));
546                                 lafs_wake_thread(fs);
547                         }
548                         if (test_bit(B_Orphan, &db->b.flags)) {
549                                 spin_lock(&fs->lock);
550                                 if (test_bit(B_Orphan, &db->b.flags) &&
551                                     list_empty(&db->orphans)) {
552                                         list_add(&db->orphans, &fs->pending_orphans);
553                                         lafs_wake_thread(fs);
554                                 }
555                                 spin_unlock(&fs->lock);
556                         }
557                 }
558                 mutex_unlock(&fs->cleaner.lock);
559         }
560 }
561
562 unsigned long lafs_do_clean(struct fs *fs)
563 {
564         /*
565          * If the cleaner is inactive, we need to decide whether to
566          * activate.  This depends on the amount of free space that
567          * is tied up in dirty segments.
568          * If we decide to activate, we collect a few segments and
569          * start work on them.
570          *
571          * If the cleaner is active we simply try to progress any activity.
572          * Any activity may trigger an async read in which case we
573          * leave it and move on.
574          *
575          * (Most of this happens in try_clean() )
576          * - If we have chosen a segment/cluster but haven't loaded the
577          *   cluster head, load the cluster head.
578          * - If we have loaded a cluster head but haven't processed all
579          *   of it, process some more into a list of block addresses
580          * - If we have loaded a cluster head and have processed all
581          *   of it, select the next cluster in the segment, or forget
582          *   the segment.
583          * - If we have un-processed block addresses in some snapshots
584          *   Try to find the current addresses of those blocks
585          * - If we found blocks in the current segment, read them in
586          * - If we have them read in, mark them for reallocation.
587          * - If all done, process realloc_leafs and allocate to clean cluster.
588          *
589          */
590         if (!fs->cleaner.active &&
591             !test_bit(CheckpointNeeded, &fs->fsstate) &&
592             !test_bit(CleanerDisabled, &fs->fsstate)) {
593                 /* Choose to clean when the fraction of all space that is clean
594                  * is below the fraction of free space that is not clean.
595                  * i.e. if T is total space, C is clean space, F is free space,
596                  * then clean when C/T < (F-C)/F
597                  * So as the amount of clean space decreases we are less tolerant
598                  * of unavailable free space.
599                  * Avoiding division, this is
600                  *       C * F < T * (F - C)
601                  * As we always reserve 3 clean segments for accounting overhead
602                  * and 1 to ensure we can handle deletions,  we exclude those
603                  * clean segments from the calculations.
604                  * i.e. subtract 4 segments from T, C and F
605                  *
606                  * T we know from the size of the devices
607                  * C we know by counting the clean segments
608                  * F we count each time we scan the segments. (total_free)
609                  *  We used the largest count of last pass and this pass.
610                  *
611                  * We need to avoid cleaning too much in one checkpoint as
612                  *  the free counts will start to get misleading.
613                  * Maybe every time we choose to clean a segment, we add the
614                  * size of the segment to some counter and add that to C in the
615                  * above calculations.
616                  *
617                  * For now, clean up to 4 segments at a time.
618                  */
619                 int i, max_segs;
620                 u64 T = 0;
621                 int force_checkpoint_after_clean = 0;
622
623                 for (i = 0; i < fs->devices; i++)
624                         T += fs->devs[i].size;
625
626                 T -= TOTAL_RESERVED * fs->max_segment;
627
628                 max_segs = lafs_alloc_cleaner_segs(fs, CLEANER_SEGS);
629                 if (max_segs < 1) {
630                         /* If we can only clean to main segment, we may
631                          * have to.  However:
632                          *  - only do one segment at a time
633                          *  - only if there are no clean (but not yet free) segments
634                          *  - if CleanerBlocks, then clean.
635                          *  otherwise don't.
636                          */
637                         if (fs->segtrack->clean.cnt == 0
638                             && test_bit(CleanerBlocks, &fs->fsstate)) {
639                                 max_segs = 1;
640                                 force_checkpoint_after_clean = 1;
641                         }
642                 }
643                 for (i = 0; i < max_segs; i++) {
644                         struct toclean *tc = &fs->cleaner.seg[i];
645                         u64 C = fs->free_blocks + fs->clean_reserved
646                                 + fs->cleaner.cleaning;
647                         u64 F = max(fs->total_free, fs->total_free_prev);
648
649                         if (TOTAL_RESERVED * fs->max_segment < C)
650                                 C -= TOTAL_RESERVED * fs->max_segment; /* adjust to unusable space FIXME adjust F too? */
651                         else
652                                 C = 0;
653                         if (TOTAL_RESERVED * fs->max_segment < F)
654                                 F -= TOTAL_RESERVED * fs->max_segment; /* adjust to unusable space FIXME adjust F too? */
655                         else
656                                 F = 0;
657
658                         dprintk("C=%llu F=%llu T=%llu\n", C, F, T);
659                         if ((F < C || C * F >= T * (F - C)) &&
660                             !test_bit(EmergencyClean, &fs->fsstate) &&
661                             !test_bit(EmergencyPending, &fs->fsstate) &&
662                             !test_bit(CleanerBlocks, &fs->fsstate)) {
663                                 dprintk("CLEANER: enough cleaning with %d segments\n",
664                                         i);
665                                 break;
666                         }
667
668                         if (tc->chead == NULL)
669                                 continue;
670
671                         /* OK, we are good to keep cleaning */
672                         tc->have_addr = lafs_get_cleanable(fs, &tc->dev, &tc->seg);
673                         if (!tc->have_addr) {
674                                 dprintk("CLEANER: Nothing found to clean at %d :-(\n",
675                                         i);
676                                 if (i == 0 && (test_bit(EmergencyPending, &fs->fsstate) ||
677                                                test_bit(CleanerBlocks, &fs->fsstate)))
678                                         lafs_checkpoint_start(fs);
679                                 break;
680                         }
681                         printk("CLEANER: clean %d/%d\n", tc->dev, tc->seg);
682                         fs->cleaner.cleaning += fs->devs[tc->dev].segment_size /* - 1*/;
683                         tc->haddr = segtovirt(fs, tc->dev, tc->seg);
684                         tc->gh = NULL;
685                         tc->desc = NULL;
686                         tc->ac.state = 1;
687                         tc->seq = 0;
688                         INIT_LIST_HEAD(&tc->cleaning);
689                         fs->cleaner.active = 1;
690                         if (force_checkpoint_after_clean)
691                                 lafs_checkpoint_start(fs);
692                 }
693                 if (i == CLEANER_SEGS)
694                         dprintk("CLEANER: found %d segments to clean\n", i);
695         }
696         if (fs->cleaner.active) {
697                 int cnt = 0;
698                 int i;
699                 int doflush = 1;
700                 for (i = 0; i < CLEANER_SEGS ; i++) {
701                         struct toclean *tc = &fs->cleaner.seg[i];
702                         if (tc->have_addr || !list_empty(&tc->cleaning)) {
703                                 /* Might be something to do here */
704                                 int done = try_clean(fs, tc);
705                                 if (done < 0)
706                                         doflush = 2;
707                                 if (done == 0 && doflush == 1)
708                                         doflush = 0;
709                                 cnt++;
710                         }
711                 }
712                 if (doflush)
713                         cleaner_flush(fs);
714                 if (cnt == 0) {
715                         fs->cleaner.active = 0;
716                         lafs_wake_thread(fs);
717                 }
718         }
719         if (test_bit(CleanerBlocks, &fs->fsstate)) {
720                 int any_clean;
721                 int clean = lafs_clean_count(fs, &any_clean);
722                 dprintk("clean=%d max_seg=%d need=%d act=%d any=%d\n", (int)clean,
723                         (int)fs->max_segment, (int)fs->cleaner.need, fs->cleaner.active, any_clean);
724                 if (any_clean) {
725                         /* If there is enough clean space for everything to move
726                          * forward, or the cleaner has done all it can, then
727                          * push out a checkpoint so threads waiting on the cleaner
728                          * can proceed
729                          */
730                         if (clean * fs->max_segment
731                             >= fs->allocated_blocks + fs->cleaner.need
732                             ||
733                             !fs->cleaner.active)
734                                 lafs_checkpoint_start(fs);
735                 }
736         }
737         return MAX_SCHEDULE_TIMEOUT;
738 }