clean.c

   1
   2 /*
   3  * fs/lafs/clean.c
   4  * Copyright (C) 2005-2010
   5  * Neil Brown <neilb@suse.de>
   6  * Released under the GPL, version 2
   7  */
   8
   9 #include "lafs.h"
  10
  11 /* mark_cleaning
  12  * Given a block that is in a segment that is being cleaner, mark
  13  * and pin it so that it gets cleaner.
  14  * This is written to cope with failure when allocating space, but in
  15  * the current design, that should never happen.
  16  */
  17 static int mark_cleaning(struct block *b)
  18 {
  19         int err;
  20         int credits;
  21
  22         if (test_bit(B_Realloc, &b->flags))
  23                 /* As cleaning is single threaded,
  24                  * we cannot race with another
  25                  * pin_dblock_clean here - the
  26                  * previous attempt must have succeeded
  27                  */
  28                 return 0;
  29         err = lafs_reserve_block(b, CleanSpace);
  30         if (err)
  31                 return err;
  32         LAFS_BUG(!test_bit(B_Valid, &b->flags), b);
  33         credits = lafs_space_alloc(fs_from_inode(b->inode), 2, CleanSpace);
  34         if (credits == 0)
  35                 return -EAGAIN;
  36         if (!test_and_set_bit(B_Realloc, &b->flags))
  37                 credits--;
  38
  39         lafs_pin_block(b);
  40         if (test_bit(B_Dirty, &b->flags)) {
  41                 /* If dirty, then now that it is pinned,
  42                  * it will get written, so don't need
  43                  * Realloc
  44                  */
  45                 if (test_and_clear_bit(B_Realloc, &b->flags))
  46                         credits++;
  47         }
  48
  49         if (!test_and_set_bit(B_UnincCredit, &b->flags))
  50                 credits--;
  51         lafs_space_return(fs_from_inode(b->inode), credits);
  52
  53         lafs_refile(b, 0);
  54         lafs_refile(&b->parent->b, 0);
  55         return 0;
  56 }
  57
  58 /* first_in_seg
  59  * Find the first block among this and its ancenstor which
  60  * is in the nominated segment - if any are.
  61  * As the cluster header does not differentiate between
  62  * index blocks of different depths, we need to check them
  63  * all.
  64  */
  65 static struct block *first_in_seg(struct block *b, struct fs *fs,
  66                                   int dev, u32 seg, REFARG)
  67 {
  68         struct address_space *as = &b->inode->i_data;
  69         struct block *p;
  70         if (in_seg(fs, dev, seg, b->physaddr))
  71                 return getref(b, REF);
  72
  73         spin_lock(&as->private_lock);
  74         for (p = b;
  75              p && !in_seg(fs, dev, seg, p->physaddr);
  76              p = &(p->parent)->b) {
  77                 if (test_bit(B_InoIdx, &p->flags)) {
  78                         struct datablock *db = LAFSI(p->inode)->dblock;
  79
  80                         spin_unlock(&as->private_lock);
  81                         as = &db->b.inode->i_data;
  82                         spin_lock(&as->private_lock);
  83                 }
  84         }
  85         if (p && test_bit(B_InoIdx, &p->flags)) {
  86                 struct datablock *db = LAFSI(p->inode)->dblock;
  87                 spin_unlock(&as->private_lock);
  88                 as = &db->b.inode->i_data;
  89                 spin_lock(&as->private_lock);
  90                 p = &db->b;
  91         }
  92
  93         if (p)
  94                 getref_locked(p, REF);
  95
  96         spin_unlock(&as->private_lock);
  97         return p;
  98 }
  99
 100 /* To 'flush' the cleaner, anything on the fs->clean_leafs needs
 101  * to be either allocated to a cleaning-cluster or incorporated then added.
 102  * If the list gets empty we flush out the cleaning cluster and return.
 103  * The next time the cleaner runs it will come back here and do
 104  * some more flushing.
 105  */
 106 static void cleaner_flush(struct fs *fs)
 107 {
 108         struct block *b;
 109         int err = 0;
 110         dprintk("Start cleaner_flush\n");
 111         while (!err &&
 112                (b = lafs_get_flushable(fs, -1)) != NULL) {
 113                 int unlock = 1;
 114
 115                 dprintk("cleaning %s\n", strblk(b));
 116
 117                 if (test_bit(B_PinPending, &b->flags)) {
 118                         /* Cannot safely clean this now.  Just mark
 119                          * it Dirty (it probably will be soon anyway)
 120                          * so it gets written to the new-data segment
 121                          * which will effectively clean it.
 122                          */
 123                         if (!test_and_set_bit(B_Dirty, &b->flags))
 124                                 if (!test_and_clear_bit(B_Realloc, &b->flags))
 125                                         if (!test_and_clear_bit(B_Credit, &b->flags))
 126                                                 LAFS_BUG(1, b);
 127                 }
 128                 if (test_bit(B_Dirty, &b->flags)) {
 129                         /* Ignore this, checkpoint will take it */
 130                         if (test_and_clear_bit(B_Realloc, &b->flags))
 131                                 if (test_and_set_bit(B_Credit, &b->flags))
 132                                         lafs_space_return(fs, 1);
 133                 } else if (test_bit(B_Index, &b->flags) &&
 134                            (iblk(b)->uninc ||
 135                             iblk(b)->uninc_table.pending_cnt)) {
 136                         lafs_incorporate(fs, iblk(b));
 137                 } else {
 138                         err = lafs_cluster_allocate(b, 1);
 139                         unlock = 0;
 140                 }
 141                 if (unlock)
 142                         lafs_iounlock_block(b);
 143                 putref(b, MKREF(flushable));
 144         }
 145         lafs_cluster_flush(fs, 1);
 146 }
 147
 148 /*
 149  * Load the next cluster header for this segment and perform
 150  * some validity checks.  If there is a failure, simply
 151  * update the 'tc' state so that it looks like there are
 152  * no more clusters to find.
 153  */
 154 static void cleaner_load(struct fs *fs, struct toclean *tc)
 155 {
 156         int err;
 157         err = lafs_load_page_async(fs, tc->chead,
 158                                    tc->haddr,
 159                                    (PAGE_SIZE /
 160                                     fs->blocksize),
 161                                    &tc->ac);
 162
 163         BUG_ON(err && err != -EAGAIN && err != -EIO);
 164
 165         if (err == -EAGAIN)
 166                 return;
 167         if (err)
 168                 //printk("CLEANER got IO error !!\n");
 169                 // FIXME adjust youth to so as not to touch this again
 170                 goto bad_header;
 171
 172         tc->ch = page_address(tc->chead);
 173         if (memcmp(tc->ch->idtag, "LaFSHead", 8) != 0)
 174                 goto bad_header;
 175         if (memcmp(tc->ch->uuid, fs->state->uuid, 16) != 0)
 176                 goto bad_header;
 177         if (tc->seq == 0)
 178                 tc->seq = le64_to_cpu(tc->ch->seq);
 179         else {
 180                 tc->seq++;
 181                 if (tc->seq != le64_to_cpu(tc->ch->seq)) {
 182                         printk("Bad seq number\n");
 183                         goto bad_header;
 184                 }
 185         }
 186         if (lafs_calc_cluster_csum(tc->ch) != tc->ch->checksum) {
 187                 //printk("Cluster header checksum is wrong!!\n");
 188                 goto bad_header;
 189         }
 190         dprintk("try_clean: got header %d\n", (int)tc->haddr);
 191         return;
 192
 193 bad_header:
 194         tc->ac.state = 0;
 195         tc->have_addr = 0;
 196 }
 197
 198 /* Parse a cluster header and identify blocks that might need cleaning.
 199  * Each time through we start parsing from the start.
 200  * As we find blocks, or reject inodes we update the header so that
 201  * the next time through we don't try those again.
 202  * Once we have started IO on 16 different inodes, we take a break
 203  * and let some of the IO complete.
 204  * As we find blocks, we put them on a list to be processed later.
 205  */
 206 static void cleaner_parse(struct fs *fs, struct toclean *tc)
 207 {
 208         u32 bnum;
 209         int bcnt;
 210         u32 inum, fsnum;
 211         u16 trunc;
 212         struct inode *ino = NULL;
 213         int again = 0;
 214
 215         /* Always start at the beginning - we invalidate entries
 216          * that we have finished with
 217          */
 218         tc->gh = tc->ch->groups;
 219         tc->desc = tc->gh->u.desc;
 220
 221         while (again < 16) {
 222                 /* Load the index block for each described data or index block.
 223                  * For data blocks, get the address and possibly load the
 224                  * data block too.
 225                  * As blocks are loaded, they will be checked for membership
 226                  * in a current cleaning-segment and flagged for reallocation
 227                  * if appropriate.
 228                  */
 229                 if ((((char *)tc->gh) - (char *)tc->ch)
 230                     >= le16_to_cpu(tc->ch->Hlength)) {
 231                         /* Finished with that cluster, try another. */
 232                         u64 next;
 233                         if (again)
 234                                 /* Need to come back later */
 235                                 break;
 236                         next = le64_to_cpu(tc->ch->next_addr);
 237                         if (next > tc->haddr &&
 238                             in_seg(fs, tc->dev, tc->seg, next)) {
 239                                 tc->haddr = next;
 240                                 tc->ac.state = 1;
 241                         } else {
 242                                 tc->ac.state = 0;
 243                                 tc->have_addr = 0;
 244                         }
 245                         tc->ch = NULL;
 246                         lafs_wake_thread(fs);
 247                         break;
 248                 }
 249                 if (((((char *)tc->desc) - (char *)tc->gh)+3)/4
 250                     >= le16_to_cpu(tc->gh->group_size_words)) {
 251                         /* Finished with that group, try another */
 252                         /* FIXME what if group has padding at end?
 253                          * this might be fixed, but need to be certain
 254                          * of all possibilities. */
 255                         BUG_ON(le16_to_cpu(tc->gh->group_size_words) == 0);
 256                         tc->gh = (struct group_head *)(((char *)tc->gh) +
 257                                                        le16_to_cpu(tc->gh->group_size_words)*4);
 258                         tc->desc = tc->gh->u.desc;
 259                         continue;
 260                 }
 261                 if (le16_to_cpu(tc->desc->block_bytes) > DescMiniOffset &&
 262                     tc->desc->block_bytes != DescIndex) {
 263                         /* This is a miniblock, skip it. */
 264                         int len = le16_to_cpu(tc->desc->block_bytes)
 265                                 - DescMiniOffset;
 266                         tc->desc++;
 267                         tc->desc = (struct descriptor *)
 268                                 (((char *)tc->desc)
 269                                  + roundup(len, 4));
 270                         continue;
 271                 }
 272                 /* Ok, desc seems to be a valid descriptor in this group */
 273                 /* Try to load the index info for block_num in inode in filesys.
 274                  */
 275                 // FIXME track phys block number for comparison
 276                 // FIXME try to optimise out based on youth and snapshot age
 277
 278                 bnum = le32_to_cpu(tc->desc->block_num);
 279                 bcnt = le16_to_cpu(tc->desc->block_cnt);
 280                 if (bcnt == 0) {
 281                         tc->desc++;
 282                         continue;
 283                 }
 284                 inum = le32_to_cpu(tc->gh->inum);
 285                 fsnum = le32_to_cpu(tc->gh->fsnum);
 286                 trunc = le16_to_cpu(tc->gh->truncatenum_and_flag) & 0x7fff;
 287
 288                 if (inum == 0xFFFFFFFF &&
 289                     fsnum == 0xFFFFFFFF) {
 290                         tc->desc++;
 291                         continue;
 292                 }
 293                 dprintk("Cleaner looking at %d/%d %d+%d (%d)\n",
 294                         (int)fsnum, (int)inum, (int)bnum, (int)bcnt,
 295                         (int)le16_to_cpu(tc->desc->block_bytes));
 296
 297                 if (fsnum == 0 && inum == 0 && bnum == 0)
 298                         goto skip;
 299
 300                 if (ino == NULL ||
 301                     ino->i_ino != inum ||
 302                     LAFSI(ino)->filesys->i_ino != fsnum) {
 303                         if (ino)
 304                                 lafs_iput_fs(ino);
 305                         if (fsnum) {
 306                                 struct inode *fsino =
 307                                         lafs_iget_fs(fs, 0, fsnum, ASYNC);
 308                                 if (IS_ERR(fsino))
 309                                         ino = fsino;
 310                                 else if (LAFSI(fsino)->md.fs.creation_age > tc->seq) {
 311                                         /* skip this inode as filesystem
 312                                          * is newer
 313                                          */
 314                                         lafs_iput_fs(fsino);
 315                                         ino = NULL;
 316                                         goto skip_inode;
 317                                 } else
 318                                         lafs_iput_fs(fsino);
 319                         }
 320                         ino = lafs_iget_fs(fs, fsnum, inum, ASYNC);
 321                 }
 322                 if (!IS_ERR(ino)) {
 323                         int itrunc;
 324                         struct datablock *b;
 325                         dprintk("got the inode\n");
 326                         /* Minor optimisation for files that have shrunk */
 327                         /* Actually this is critical for handling truncation
 328                          * properly.  We don't want to even 'get' a block beyond
 329                          * EOF, certainly not after the truncate_inode_pages.
 330                          */
 331                         if (LAFSI(ino)->type == 0 ||
 332                             (LAFSI(ino)->type >= TypeBase &&
 333                              ((loff_t)bnum << ino->i_blkbits) >= i_size_read(ino))) {
 334                                 /* skip the whole descriptor */
 335                                 bcnt = 1;
 336                                 goto skip;
 337                         }
 338                         itrunc = ((ino->i_generation<<8) |
 339                                   (LAFSI(ino)->trunc_gen & 0xff)) & 0x7fff;
 340                         if (itrunc != trunc) {
 341                                 /* file has been truncated or replaced since
 342                                  * this cluster
 343                                  */
 344                                 goto skip_inode;
 345                         }
 346                         b = lafs_get_block(ino, bnum, NULL, GFP_NOFS,
 347                                            MKREF(cleaning));
 348                         if (b == NULL)
 349                                 break;
 350
 351                         if (!test_and_set_bit(B_Cleaning, &b->b.flags)) {
 352                                 getdref(b, MKREF(cleaning));
 353                                 lafs_igrab_fs(ino);
 354                         }
 355                         if (LAFSI(ino)->type == TypeInodeFile ||
 356                             LAFSI(ino)->type == TypeDir) {
 357                                 /* Could become an orphan just now, so need
 358                                  * to protect b->cleaning
 359                                  */
 360                                 spin_lock(&fs->lock);
 361                                 list_move_tail(&b->cleaning, &tc->cleaning);
 362                                 spin_unlock(&fs->lock);
 363                         } else {
 364                                 /* No locking needed */
 365                                 if (list_empty(&b->cleaning))
 366                                         list_add_tail(&b->cleaning, &tc->cleaning);
 367                         }
 368
 369                         /* We can race with truncate here, so need to check
 370                          * i_size again now that b->cleaning is non-empty.
 371                          * The thread doing the truncate will have to lock
 372                          * the page holding this block, which should be enough
 373                          * of a barrier so that if it sets i_size after now,
 374                          * it will see that b->cleaning is non-empty.
 375                          * We need to be sure that if it sets it before now,
 376                          * we get to see i_size.
 377                          * So I think a memory barrier is a good idea...
 378                          */
 379                         mb();
 380
 381                         if (LAFSI(ino)->type == 0 ||
 382                             (LAFSI(ino)->type >= TypeBase &&
 383                              ((loff_t)bnum << ino->i_blkbits)
 384                              >= i_size_read(ino))) {
 385                                 list_del_init(&b->cleaning);
 386                                 if (test_and_clear_bit(B_Cleaning, &b->b.flags)) {
 387                                         putdref(b, MKREF(cleaning));
 388                                         lafs_iput_fs(ino);
 389                                 }
 390                         }
 391                         putdref(b, MKREF(cleaning));
 392                 } else  {
 393                         int err = PTR_ERR(ino);
 394                         ino = NULL;
 395                         dprintk("iget gives error %d\n", err);
 396                         if (err == -EAGAIN) {
 397                                 again++;
 398                                 tc->desc++;
 399                                 continue;
 400                         }
 401                         /* inode not found, make sure we never
 402                          * look for it again
 403                          */
 404                 skip_inode:
 405                         tc->gh->inum = 0xFFFFFFFF;
 406                         tc->gh->fsnum = 0xFFFFFFFF;
 407                         tc->desc++;
 408                         continue;
 409                 }
 410         skip:
 411                 /* We modify the descriptor in-place to track where
 412                  * we are up to.  This is a private copy.  The real
 413                  * descriptor doesn't change.
 414                  */
 415                 tc->desc->block_num = cpu_to_le32(bnum+1);
 416                 tc->desc->block_cnt = cpu_to_le16(bcnt-1);
 417         }
 418         if (ino)
 419                 lafs_iput_fs(ino);
 420 }
 421
 422 /* Process all blocks that have been found to possibly need to be
 423  * moved from their current address (i.e. cleaned).
 424  * We initiate async index lookup, then if the block really
 425  * is in the target segment we initiate async read.  Once
 426  * that is complete we mark the block for cleaning and
 427  * releaes it.
 428  */
 429 static int cleaner_process(struct fs *fs, struct toclean *tc)
 430 {
 431         struct datablock *b, *tmp;
 432         int rv = 0;
 433         struct inode *ino;
 434
 435         dprintk("start processing list\n");
 436         list_for_each_entry_safe(b, tmp, &tc->cleaning, cleaning) {
 437                 struct block *cb = NULL;
 438                 int err = lafs_find_block_async(b);
 439                 dprintk("find_async gives %d %s\n", err, strblk(&b->b));
 440                 if (err == -EAGAIN)
 441                         continue;
 442                 if (err)
 443                         /* Eeek, what do I do?? */
 444                         goto done_cleaning;
 445
 446                 cb = first_in_seg(&b->b, fs, tc->dev, tc->seg, MKREF(clean2));
 447
 448                 if (cb == NULL) {
 449                         /* Moved, don't want this. */
 450                         dprintk("Not in seg\n");
 451                         goto done_cleaning;
 452                 }
 453                 err = lafs_load_block(cb, NULL);
 454                 if (err)
 455                         goto done_cleaning;
 456
 457                 err = lafs_wait_block_async(cb);
 458                 if (err == -EAGAIN) {
 459                         putref(cb, MKREF(clean2));
 460                         continue;
 461                 }
 462                 if (err)
 463                         goto done_cleaning;
 464
 465                 err = mark_cleaning(cb);
 466                 dprintk("Want to clean %s (%d)\n",
 467                         strblk(cb), err);
 468                 if (err)
 469                         rv = -1;
 470
 471                 /* as cb is B_Pinned, it holds an effective
 472                  * ref on the inode, so it is safe to drop our
 473                  * ref now
 474                  */
 475         done_cleaning:
 476                 clear_bit(B_Cleaning, &b->b.flags);
 477
 478                 list_del_init(&b->cleaning);
 479                 if (test_bit(B_Orphan, &b->b.flags)) {
 480                         spin_lock(&fs->lock);
 481                         if (test_bit(B_Orphan, &b->b.flags) &&
 482                             list_empty(&b->orphans)) {
 483                                 list_add(&b->orphans, &fs->pending_orphans);
 484                                 lafs_wake_thread(fs);
 485                         }
 486                         spin_unlock(&fs->lock);
 487                 }
 488
 489                 ino = b->b.inode;
 490                 putdref(b, MKREF(cleaning));
 491                 lafs_iput_fs(ino);
 492                 putref(cb, MKREF(clean2));
 493                 if (rv)
 494                         break;
 495         }
 496         return rv;
 497 }
 498
 499 /*
 500  * Try to advance the process of cleaning the given segment.
 501  * This may require loading a cluster head, parsing or reparsing
 502  * that head, or loading some blocks.
 503  */
 504 static int try_clean(struct fs *fs, struct toclean *tc)
 505 {
 506         /* return 1 if everything has been found, -ve if we need to flush */
 507         int rv = 0;
 508
 509         mutex_lock(&fs->cleaner.lock);
 510         dprintk("try_clean: state = %d\n", tc->ac.state);
 511         if (tc->ch == NULL && tc->have_addr)
 512                 cleaner_load(fs, tc);
 513
 514         if (tc->ch)
 515                 cleaner_parse(fs, tc);
 516
 517         if (!list_empty(&tc->cleaning))
 518                 rv = cleaner_process(fs, tc);
 519         if (!rv)
 520                 rv = (tc->ch == NULL && !tc->have_addr &&
 521                       list_empty(&tc->cleaning));
 522         mutex_unlock(&fs->cleaner.lock);
 523         return rv;
 524 }
 525
 526 /*
 527  * When we truncate a file, and block that is in the
 528  * process of being cleaned must have that cleaning
 529  * cancelled.  That is done by lafs_erase_dblock calling
 530  * lafs_unclean.
 531  */
 532 void lafs_unclean(struct datablock *db)
 533 {
 534         if (!list_empty_careful(&db->cleaning)) {
 535                 struct fs *fs = fs_from_inode(db->b.inode);
 536                 mutex_lock(&fs->cleaner.lock);
 537                 if (test_and_clear_bit(B_Cleaning, &db->b.flags)) {
 538                         /* This must be on the cleaner list, so
 539                          * it is safe to delete without a spinlock
 540                          */
 541                         list_del_init(&db->cleaning);
 542                         putdref(db, MKREF(cleaning));
 543                         lafs_iput_fs(db->b.inode);
 544                         if (test_and_clear_bit(B_Async, &db->b.flags)) {
 545                                 putdref(db, MKREF(async));
 546                                 lafs_wake_thread(fs);
 547                         }
 548                         if (test_bit(B_Orphan, &db->b.flags)) {
 549                                 spin_lock(&fs->lock);
 550                                 if (test_bit(B_Orphan, &db->b.flags) &&
 551                                     list_empty(&db->orphans)) {
 552                                         list_add(&db->orphans, &fs->pending_orphans);
 553                                         lafs_wake_thread(fs);
 554                                 }
 555                                 spin_unlock(&fs->lock);
 556                         }
 557                 }
 558                 mutex_unlock(&fs->cleaner.lock);
 559         }
 560 }
 561
 562 unsigned long lafs_do_clean(struct fs *fs)
 563 {
 564         /*
 565          * If the cleaner is inactive, we need to decide whether to
 566          * activate.  This depends on the amount of free space that
 567          * is tied up in dirty segments.
 568          * If we decide to activate, we collect a few segments and
 569          * start work on them.
 570          *
 571          * If the cleaner is active we simply try to progress any activity.
 572          * Any activity may trigger an async read in which case we
 573          * leave it and move on.
 574          *
 575          * (Most of this happens in try_clean() )
 576          * - If we have chosen a segment/cluster but haven't loaded the
 577          *   cluster head, load the cluster head.
 578          * - If we have loaded a cluster head but haven't processed all
 579          *   of it, process some more into a list of block addresses
 580          * - If we have loaded a cluster head and have processed all
 581          *   of it, select the next cluster in the segment, or forget
 582          *   the segment.
 583          * - If we have un-processed block addresses in some snapshots
 584          *   Try to find the current addresses of those blocks
 585          * - If we found blocks in the current segment, read them in
 586          * - If we have them read in, mark them for reallocation.
 587          * - If all done, process realloc_leafs and allocate to clean cluster.
 588          *
 589          */
 590         if (!fs->cleaner.active &&
 591             !test_bit(CheckpointNeeded, &fs->fsstate) &&
 592             !test_bit(CleanerDisabled, &fs->fsstate)) {
 593                 /* Choose to clean when the fraction of all space that is clean
 594                  * is below the fraction of free space that is not clean.
 595                  * i.e. if T is total space, C is clean space, F is free space,
 596                  * then clean when C/T < (F-C)/F
 597                  * So as the amount of clean space decreases we are less tolerant
 598                  * of unavailable free space.
 599                  * Avoiding division, this is
 600                  *       C * F < T * (F - C)
 601                  * As we always reserve 3 clean segments for accounting overhead
 602                  * and 1 to ensure we can handle deletions,  we exclude those
 603                  * clean segments from the calculations.
 604                  * i.e. subtract 4 segments from T, C and F
 605                  *
 606                  * T we know from the size of the devices
 607                  * C we know by counting the clean segments
 608                  * F we count each time we scan the segments. (total_free)
 609                  *  We used the largest count of last pass and this pass.
 610                  *
 611                  * We need to avoid cleaning too much in one checkpoint as
 612                  *  the free counts will start to get misleading.
 613                  * Maybe every time we choose to clean a segment, we add the
 614                  * size of the segment to some counter and add that to C in the
 615                  * above calculations.
 616                  *
 617                  * For now, clean up to 4 segments at a time.
 618                  */
 619                 int i, max_segs;
 620                 u64 T = 0;
 621                 int force_checkpoint_after_clean = 0;
 622
 623                 for (i = 0; i < fs->devices; i++)
 624                         T += fs->devs[i].size;
 625
 626                 T -= TOTAL_RESERVED * fs->max_segment;
 627
 628                 max_segs = lafs_alloc_cleaner_segs(fs, CLEANER_SEGS);
 629                 if (max_segs < 1) {
 630                         /* If we can only clean to main segment, we may
 631                          * have to.  However:
 632                          *  - only do one segment at a time
 633                          *  - only if there are no clean (but not yet free) segments
 634                          *  - if CleanerBlocks, then clean.
 635                          *  otherwise don't.
 636                          */
 637                         if (fs->segtrack->clean.cnt == 0
 638                             && test_bit(CleanerBlocks, &fs->fsstate)) {
 639                                 max_segs = 1;
 640                                 force_checkpoint_after_clean = 1;
 641                         }
 642                 }
 643                 for (i = 0; i < max_segs; i++) {
 644                         struct toclean *tc = &fs->cleaner.seg[i];
 645                         u64 C = fs->free_blocks + fs->clean_reserved
 646                                 + fs->cleaner.cleaning;
 647                         u64 F = max(fs->total_free, fs->total_free_prev);
 648
 649                         if (TOTAL_RESERVED * fs->max_segment < C)
 650                                 C -= TOTAL_RESERVED * fs->max_segment; /* adjust to unusable space FIXME adjust F too? */
 651                         else
 652                                 C = 0;
 653                         if (TOTAL_RESERVED * fs->max_segment < F)
 654                                 F -= TOTAL_RESERVED * fs->max_segment; /* adjust to unusable space FIXME adjust F too? */
 655                         else
 656                                 F = 0;
 657
 658                         dprintk("C=%llu F=%llu T=%llu\n", C, F, T);
 659                         if ((F < C || C * F >= T * (F - C)) &&
 660                             !test_bit(EmergencyClean, &fs->fsstate) &&
 661                             !test_bit(EmergencyPending, &fs->fsstate) &&
 662                             !test_bit(CleanerBlocks, &fs->fsstate)) {
 663                                 dprintk("CLEANER: enough cleaning with %d segments\n",
 664                                         i);
 665                                 break;
 666                         }
 667
 668                         if (tc->chead == NULL)
 669                                 continue;
 670
 671                         /* OK, we are good to keep cleaning */
 672                         tc->have_addr = lafs_get_cleanable(fs, &tc->dev, &tc->seg);
 673                         if (!tc->have_addr) {
 674                                 dprintk("CLEANER: Nothing found to clean at %d :-(\n",
 675                                         i);
 676                                 if (i == 0 && (test_bit(EmergencyPending, &fs->fsstate) ||
 677                                                test_bit(CleanerBlocks, &fs->fsstate)))
 678                                         lafs_checkpoint_start(fs);
 679                                 break;
 680                         }
 681                         printk("CLEANER: clean %d/%d\n", tc->dev, tc->seg);
 682                         fs->cleaner.cleaning += fs->devs[tc->dev].segment_size /* - 1*/;
 683                         tc->haddr = segtovirt(fs, tc->dev, tc->seg);
 684                         tc->gh = NULL;
 685                         tc->desc = NULL;
 686                         tc->ac.state = 1;
 687                         tc->seq = 0;
 688                         INIT_LIST_HEAD(&tc->cleaning);
 689                         fs->cleaner.active = 1;
 690                         if (force_checkpoint_after_clean)
 691                                 lafs_checkpoint_start(fs);
 692                 }
 693                 if (i == CLEANER_SEGS)
 694                         dprintk("CLEANER: found %d segments to clean\n", i);
 695         }
 696         if (fs->cleaner.active) {
 697                 int cnt = 0;
 698                 int i;
 699                 int doflush = 1;
 700                 for (i = 0; i < CLEANER_SEGS ; i++) {
 701                         struct toclean *tc = &fs->cleaner.seg[i];
 702                         if (tc->have_addr || !list_empty(&tc->cleaning)) {
 703                                 /* Might be something to do here */
 704                                 int done = try_clean(fs, tc);
 705                                 if (done < 0)
 706                                         doflush = 2;
 707                                 if (done == 0 && doflush == 1)
 708                                         doflush = 0;
 709                                 cnt++;
 710                         }
 711                 }
 712                 if (doflush)
 713                         cleaner_flush(fs);
 714                 if (cnt == 0) {
 715                         fs->cleaner.active = 0;
 716                         lafs_wake_thread(fs);
 717                 }
 718         }
 719         if (test_bit(CleanerBlocks, &fs->fsstate)) {
 720                 int any_clean;
 721                 int clean = lafs_clean_count(fs, &any_clean);
 722                 dprintk("clean=%d max_seg=%d need=%d act=%d any=%d\n", (int)clean,
 723                         (int)fs->max_segment, (int)fs->cleaner.need, fs->cleaner.active, any_clean);
 724                 if (any_clean) {
 725                         /* If there is enough clean space for everything to move
 726                          * forward, or the cleaner has done all it can, then
 727                          * push out a checkpoint so threads waiting on the cleaner
 728                          * can proceed
 729                          */
 730                         if (clean * fs->max_segment
 731                             >= fs->allocated_blocks + fs->cleaner.need
 732                             ||
 733                             !fs->cleaner.active)
 734                                 lafs_checkpoint_start(fs);
 735                 }
 736         }
 737         return MAX_SCHEDULE_TIMEOUT;
 738 }