4 * Copyright (C) 2005-2010
5 * Neil Brown <neilb@suse.de>
6 * Released under the GPL, version 2
10 #include <linux/kthread.h>
14 * This is the management thread that runs whenever the filesystem is
16 * It does a lot more than clean, though that is what it does most of.
17 * Where possible, the thread does not block on IO, though it might
18 * block on memory allocation.
19 * Some tasks need to read in data from disk to complete. These just
20 * schedule the read and signal that they should be re-tried later
21 * when the read might have completed.
23 * Such reads are marked as async and the 'struct fs' tracks how many
24 * async reads are pending. Tasks are retried when this number gets low.
26 * The particular tasks are:
27 * Cleaning. This goes through stages.
28 * choose a segment (or a collection of segments)
29 * Read the write-cluster header for that segment (as there can be
30 * multiple write clusters, we might come back here several times)
31 * Follow the indexes to all blocks that could credibly be in that cluster
32 * and load the block if it is found.
33 * As blocks are found, schedule them for the cleaner cluster.
34 * Occasionally flush the cleaner cluster.
37 * Orphans are kept on 2 lists using the datablock.orphans list.
38 * - orphans that can be processed now
39 * - orphans that can be processed after some async IO has completed
40 * To process an orphan we call a handler based on the inode type.
41 * This can be TypeInodeFile (for truncates, unlinks) and
42 * TypeDirectory for directory cleaning.
43 * These will need to take an i_mutex. If they fail, they are put on the
44 * delayed list and will be retried after async IO completes, or a
48 * This blocks any other tasks from running until the checkpoint
49 * finishes. It will block on writing out the clusters.
50 * Any cleaner-segment will be flushed first
51 * This is triggered on a sys_sync or each time a configurable number of
52 * segments has been written. In the later case we don't start the
53 * checkpoint until the segments currently being cleaned are finished
56 * Scan the segment usage files.
57 * This is a lazy scan which decays youth if needed, and looks for
58 * segments that should be cleaned or re-used.
60 * ?? Call cluster_flush if a cluster has been pending for a while
61 * This really shouldn't be needed....
64 * Every time we wake up, we give every task a chance to do work.
65 * Each task is responsible for its own rate-limiting.
66 * Each task can return a wakeup time. We set a timeout to wake at the
68 * We may be woken sooner by another process requesting action.
72 static int lafsd(void *data)
75 long timeout = MAX_SCHEDULE_TIMEOUT;
77 set_bit(ThreadNeeded, &fs->fsstate);
79 while (!kthread_should_stop()) {
80 /* We need to wait INTERRUPTIBLE so that
81 * we don't add to the load-average.
82 * That means we need to be sure no signals are
85 if (signal_pending(current))
86 flush_signals(current);
88 wait_event_interruptible_timeout
90 kthread_should_stop() ||
91 test_bit(ThreadNeeded, &fs->fsstate),
93 clear_bit(ThreadNeeded, &fs->fsstate);
95 if (test_bit(FlushNeeded, &fs->fsstate) ||
96 test_bit(SecondFlushNeeded, &fs->fsstate)) {
97 /* only push a flush now if it can happen
100 struct wc *wc = &fs->wc[0];
101 if (mutex_trylock(&wc->lock)) {
103 int which = (wc->pending_next + 3) % 4;
104 if (wc->pending_vfy_type[which] == VerifyNext &&
105 atomic_read(&wc->pending_cnt[which]) > 1)
107 which = (which + 3) % 4;
108 if (wc->pending_vfy_type[which] == VerifyNext2 &&
109 atomic_read(&wc->pending_cnt[which]) > 1)
111 mutex_unlock(&wc->lock);
113 lafs_cluster_flush(fs, 0);
117 timeout = MAX_SCHEDULE_TIMEOUT;
118 to = lafs_do_checkpoint(fs);
122 to = lafs_run_orphans(fs);
126 to = lafs_scan_seg(fs);
130 to = lafs_do_clean(fs);
134 lafs_clusters_done(fs);
140 int lafs_start_thread(struct fs *fs)
142 if (test_and_set_bit(ThreadRunning, &fs->fsstate))
143 return 0; /* already running */
145 fs->thread = kthread_run(lafsd, fs, "lafsd-%d", fs->prime_sb->s_dev);
146 if (fs->thread == NULL)
147 clear_bit(ThreadRunning, &fs->fsstate);
148 return fs->thread ? 0 : -ENOMEM;
151 void lafs_stop_thread(struct fs *fs)
154 kthread_stop(fs->thread);
158 void lafs_wake_thread(struct fs *fs)
160 set_bit(ThreadNeeded, &fs->fsstate);
161 wake_up(&fs->async_complete);
164 void lafs_trigger_flush(struct block *b)
166 struct fs *fs = fs_from_inode(b->inode);
168 if (test_bit(B_Writeback, &b->flags) &&
169 !test_and_set_bit(FlushNeeded, &fs->fsstate))
170 lafs_wake_thread(fs);