thread.c

   1
   2 /*
   3  * fs/lafs/thread.c
   4  * Copyright (C) 2005-2010
   5  * Neil Brown <neilb@suse.de>
   6  * Released under the GPL, version 2
   7  */
   8
   9 #include "lafs.h"
  10 #include <linux/kthread.h>
  11
  12
  13 /*
  14  * This is the management thread that runs whenever the filesystem is
  15  * mounted writable.
  16  * It does a lot more than clean, though that is what it does most of.
  17  * Where possible, the thread does not block on IO, though it might
  18  * block on memory allocation.
  19  * Some tasks need to read in data from disk to complete.  These just
  20  * schedule the read and signal that they should be re-tried later
  21  * when the read might have completed.
  22  *
  23  * Such reads are marked as async and the 'struct fs' tracks how many
  24  * async reads are pending.  Tasks are retried when this number gets low.
  25  *
  26  * The particular tasks are:
  27  *  Cleaning. This goes through stages.
  28  *    choose a segment (or a collection of segments)
  29  *    Read the write-cluster header for that segment (as there can be
  30  *          multiple write clusters, we might come back here several times)
  31  *    Follow the indexes to all blocks that could credibly be in that cluster
  32  *          and load the block if it is found.
  33  *    As blocks are found, schedule them for the cleaner cluster.
  34  *    Occasionally flush the cleaner cluster.
  35  *
  36  *  Orphan handling
  37  *    Orphans are kept on 2 lists using the datablock.orphans list.
  38  *    - orphans that can be processed now
  39  *    - orphans that can be processed after some async IO has completed
  40  *    To process an orphan we call a handler based on the inode type.
  41  *    This can be TypeInodeFile (for truncates, unlinks) and
  42  *      TypeDirectory for directory cleaning.
  43  *    These will need to take an i_mutex.  If they fail, they are put on the
  44  *      delayed list and will be retried after async IO completes, or a
  45  *      time has passed.
  46  *
  47  *  Run a checkpoint
  48  *    This blocks any other tasks from running until the checkpoint
  49  *    finishes.  It will block on writing out the clusters.
  50  *    Any cleaner-segment will be flushed first
  51  *    This is triggered on a sys_sync or each time a configurable number of
  52  *    segments has been written.  In the later case we don't start the
  53  *    checkpoint until the segments currently being cleaned are finished
  54  *    with.
  55  *
  56  *  Scan the segment usage files.
  57  *    This is a lazy scan which decays youth if needed, and looks for
  58  *    segments that should be cleaned or re-used.
  59  *
  60  *  ?? Call cluster_flush if a cluster has been pending for a while
  61  *    This really shouldn't be needed....
  62  *
  63  *
  64  * Every time we wake up, we give every task a chance to do work.
  65  * Each task is responsible for its own rate-limiting.
  66  * Each task can return a wakeup time.  We set a timeout to wake at the
  67  * soonest of these.
  68  * We may be woken sooner by another process requesting action.
  69  */
  70
  71
  72 static int lafsd(void *data)
  73 {
  74         struct fs *fs = data;
  75         long timeout = MAX_SCHEDULE_TIMEOUT;
  76         long to;
  77         set_bit(ThreadNeeded, &fs->fsstate);
  78
  79         while (!kthread_should_stop()) {
  80                 /* We need to wait INTERRUPTIBLE so that
  81                  * we don't add to the load-average.
  82                  * That means we need to be sure no signals are
  83                  * pending
  84                  */
  85                 if (signal_pending(current))
  86                         flush_signals(current);
  87
  88                 wait_event_interruptible_timeout
  89                         (fs->async_complete,
  90                          kthread_should_stop() ||
  91                          test_bit(ThreadNeeded, &fs->fsstate),
  92                          timeout);
  93                 clear_bit(ThreadNeeded, &fs->fsstate);
  94
  95                 if (test_bit(FlushNeeded, &fs->fsstate) ||
  96                     test_bit(SecondFlushNeeded, &fs->fsstate)) {
  97                         /* only push a flush now if it can happen
  98                          * immediately.
  99                          */
 100                         struct wc *wc = &fs->wc[0];
 101                         if (mutex_trylock(&wc->lock)) {
 102                                 int can_flush = 1;
 103                                 int which = (wc->pending_next + 3) % 4;
 104                                 if (wc->pending_vfy_type[which] == VerifyNext &&
 105                                     atomic_read(&wc->pending_cnt[which]) > 1)
 106                                         can_flush = 0;
 107                                 which = (which + 3) % 4;
 108                                 if (wc->pending_vfy_type[which] == VerifyNext2 &&
 109                                     atomic_read(&wc->pending_cnt[which]) > 1)
 110                                         can_flush = 0;
 111                                 mutex_unlock(&wc->lock);
 112                                 if (can_flush)
 113                                         lafs_cluster_flush(fs, 0);
 114                         }
 115                 }
 116
 117                 timeout = MAX_SCHEDULE_TIMEOUT;
 118                 to = lafs_do_checkpoint(fs);
 119                 if (to < timeout)
 120                         timeout = to;
 121
 122                 to = lafs_run_orphans(fs);
 123                 if (to < timeout)
 124                         timeout = to;
 125
 126                 to = lafs_scan_seg(fs);
 127                 if (to < timeout)
 128                         timeout = to;
 129
 130                 to = lafs_do_clean(fs);
 131                 if (to < timeout)
 132                         timeout = to;
 133
 134                 lafs_clusters_done(fs);
 135                 cond_resched();
 136         }
 137         return 0;
 138 }
 139
 140 int lafs_start_thread(struct fs *fs)
 141 {
 142         if (test_and_set_bit(ThreadRunning, &fs->fsstate))
 143                 return 0; /* already running */
 144
 145         fs->thread = kthread_run(lafsd, fs, "lafsd-%d", fs->prime_sb->s_dev);
 146         if (fs->thread == NULL)
 147                 clear_bit(ThreadRunning, &fs->fsstate);
 148         return fs->thread ? 0 : -ENOMEM;
 149 }
 150
 151 void lafs_stop_thread(struct fs *fs)
 152 {
 153         if (fs->thread)
 154                 kthread_stop(fs->thread);
 155         fs->thread = NULL;
 156 }
 157
 158 void lafs_wake_thread(struct fs *fs)
 159 {
 160         set_bit(ThreadNeeded, &fs->fsstate);
 161         wake_up(&fs->async_complete);
 162 }
 163
 164 void lafs_trigger_flush(struct block *b)
 165 {
 166         struct fs *fs = fs_from_inode(b->inode);
 167
 168         if (test_bit(B_Writeback, &b->flags) &&
 169             !test_and_set_bit(FlushNeeded, &fs->fsstate))
 170                 lafs_wake_thread(fs);
 171 }