2 * raid1.c : Multiple Devices driver for Linux
4 * Copyright (C) 1999, 2000 Ingo Molnar, Red Hat
6 * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman
8 * RAID-1 management functions.
10 * Better read-balancing code written by Mika Kuoppala <miku@iki.fi>, 2000
12 * Fixes to reconstruction by Jakob Østergaard" <jakob@ostenfeld.dk>
13 * Various fixes by Neil Brown <neilb@cse.unsw.edu.au>
15 * This program is free software; you can redistribute it and/or modify
16 * it under the terms of the GNU General Public License as published by
17 * the Free Software Foundation; either version 2, or (at your option)
20 * You should have received a copy of the GNU General Public License
21 * (for example /usr/src/linux/COPYING); if not, write to the Free
22 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include <linux/module.h>
26 #include <linux/config.h>
27 #include <linux/slab.h>
28 #include <linux/raid/raid1.h>
29 #include <asm/atomic.h>
31 #define MAJOR_NR MD_MAJOR
33 #define MD_PERSONALITY
35 #define MAX_WORK_PER_DISK 128
37 #define NR_RESERVED_BUFS 32
41 * The following can be used to debug the driver
46 #define PRINTK(x...) printk(x)
50 #define PRINTK(x...) do { } while (0)
54 static mdk_personality_t raid1_personality;
55 static md_spinlock_t retry_list_lock = MD_SPIN_LOCK_UNLOCKED;
56 struct raid1_bh *raid1_retry_list = NULL, **raid1_retry_tail;
58 static struct buffer_head *raid1_alloc_bh(raid1_conf_t *conf, int cnt)
60 /* return a linked list of "cnt" struct buffer_heads.
61 * don't take any off the free list unless we know we can
62 * get all we need, otherwise we could deadlock
64 struct buffer_head *bh=NULL;
67 struct buffer_head *t;
68 md_spin_lock_irq(&conf->device_lock);
69 if (!conf->freebh_blocked && conf->freebh_cnt >= cnt)
72 conf->freebh = t->b_next;
79 md_spin_unlock_irq(&conf->device_lock);
82 t = kmem_cache_alloc(bh_cachep, SLAB_NOIO);
88 PRINTK("raid1: waiting for %d bh\n", cnt);
89 conf->freebh_blocked = 1;
90 wait_disk_event(conf->wait_buffer,
91 !conf->freebh_blocked ||
92 conf->freebh_cnt > conf->raid_disks * NR_RESERVED_BUFS/2);
93 conf->freebh_blocked = 0;
99 static inline void raid1_free_bh(raid1_conf_t *conf, struct buffer_head *bh)
102 spin_lock_irqsave(&conf->device_lock, flags);
104 struct buffer_head *t = bh;
106 if (t->b_pprev == NULL)
107 kmem_cache_free(bh_cachep, t);
109 t->b_next= conf->freebh;
114 spin_unlock_irqrestore(&conf->device_lock, flags);
115 wake_up(&conf->wait_buffer);
118 static int raid1_grow_bh(raid1_conf_t *conf, int cnt)
120 /* allocate cnt buffer_heads, possibly less if kmalloc fails */
124 struct buffer_head *bh;
125 bh = kmem_cache_alloc(bh_cachep, SLAB_KERNEL);
128 md_spin_lock_irq(&conf->device_lock);
129 bh->b_pprev = &conf->freebh;
130 bh->b_next = conf->freebh;
133 md_spin_unlock_irq(&conf->device_lock);
140 static void raid1_shrink_bh(raid1_conf_t *conf)
142 /* discard all buffer_heads */
144 md_spin_lock_irq(&conf->device_lock);
145 while (conf->freebh) {
146 struct buffer_head *bh = conf->freebh;
147 conf->freebh = bh->b_next;
148 kmem_cache_free(bh_cachep, bh);
151 md_spin_unlock_irq(&conf->device_lock);
155 static struct raid1_bh *raid1_alloc_r1bh(raid1_conf_t *conf)
157 struct raid1_bh *r1_bh = NULL;
160 md_spin_lock_irq(&conf->device_lock);
161 if (!conf->freer1_blocked && conf->freer1) {
162 r1_bh = conf->freer1;
163 conf->freer1 = r1_bh->next_r1;
165 r1_bh->next_r1 = NULL;
166 r1_bh->state = (1 << R1BH_PreAlloc);
167 r1_bh->bh_req.b_state = 0;
169 md_spin_unlock_irq(&conf->device_lock);
172 r1_bh = (struct raid1_bh *) kmalloc(sizeof(struct raid1_bh), GFP_NOIO);
174 memset(r1_bh, 0, sizeof(*r1_bh));
177 conf->freer1_blocked = 1;
178 wait_disk_event(conf->wait_buffer,
179 !conf->freer1_blocked ||
180 conf->freer1_cnt > NR_RESERVED_BUFS/2
182 conf->freer1_blocked = 0;
186 static inline void raid1_free_r1bh(struct raid1_bh *r1_bh)
188 struct buffer_head *bh = r1_bh->mirror_bh_list;
189 raid1_conf_t *conf = mddev_to_conf(r1_bh->mddev);
191 r1_bh->mirror_bh_list = NULL;
193 if (test_bit(R1BH_PreAlloc, &r1_bh->state)) {
195 spin_lock_irqsave(&conf->device_lock, flags);
196 r1_bh->next_r1 = conf->freer1;
197 conf->freer1 = r1_bh;
199 spin_unlock_irqrestore(&conf->device_lock, flags);
200 /* don't need to wakeup wait_buffer because
201 * raid1_free_bh below will do that
206 raid1_free_bh(conf, bh);
209 static int raid1_grow_r1bh (raid1_conf_t *conf, int cnt)
214 struct raid1_bh *r1_bh;
215 r1_bh = (struct raid1_bh*)kmalloc(sizeof(*r1_bh), GFP_KERNEL);
218 memset(r1_bh, 0, sizeof(*r1_bh));
219 set_bit(R1BH_PreAlloc, &r1_bh->state);
220 r1_bh->mddev = conf->mddev;
222 raid1_free_r1bh(r1_bh);
228 static void raid1_shrink_r1bh(raid1_conf_t *conf)
230 md_spin_lock_irq(&conf->device_lock);
231 while (conf->freer1) {
232 struct raid1_bh *r1_bh = conf->freer1;
233 conf->freer1 = r1_bh->next_r1;
237 md_spin_unlock_irq(&conf->device_lock);
242 static inline void raid1_free_buf(struct raid1_bh *r1_bh)
245 struct buffer_head *bh = r1_bh->mirror_bh_list;
246 raid1_conf_t *conf = mddev_to_conf(r1_bh->mddev);
247 r1_bh->mirror_bh_list = NULL;
249 spin_lock_irqsave(&conf->device_lock, flags);
250 r1_bh->next_r1 = conf->freebuf;
251 conf->freebuf = r1_bh;
252 spin_unlock_irqrestore(&conf->device_lock, flags);
253 raid1_free_bh(conf, bh);
256 static struct raid1_bh *raid1_alloc_buf(raid1_conf_t *conf)
258 struct raid1_bh *r1_bh;
260 md_spin_lock_irq(&conf->device_lock);
261 wait_event_lock_irq(conf->wait_buffer, conf->freebuf, conf->device_lock);
262 r1_bh = conf->freebuf;
263 conf->freebuf = r1_bh->next_r1;
264 r1_bh->next_r1= NULL;
265 md_spin_unlock_irq(&conf->device_lock);
270 static int raid1_grow_buffers (raid1_conf_t *conf, int cnt)
273 struct raid1_bh *head = NULL, **tail;
277 struct raid1_bh *r1_bh;
280 page = alloc_page(GFP_KERNEL);
284 r1_bh = (struct raid1_bh *) kmalloc(sizeof(*r1_bh), GFP_KERNEL);
289 memset(r1_bh, 0, sizeof(*r1_bh));
290 r1_bh->bh_req.b_page = page;
291 r1_bh->bh_req.b_data = page_address(page);
293 r1_bh->next_r1 = NULL;
294 tail = & r1_bh->next_r1;
297 /* this lock probably isn't needed, as at the time when
298 * we are allocating buffers, nobody else will be touching the
299 * freebuf list. But it doesn't hurt....
301 md_spin_lock_irq(&conf->device_lock);
302 *tail = conf->freebuf;
303 conf->freebuf = head;
304 md_spin_unlock_irq(&conf->device_lock);
308 static void raid1_shrink_buffers (raid1_conf_t *conf)
310 struct raid1_bh *head;
311 md_spin_lock_irq(&conf->device_lock);
312 head = conf->freebuf;
313 conf->freebuf = NULL;
314 md_spin_unlock_irq(&conf->device_lock);
317 struct raid1_bh *r1_bh = head;
318 head = r1_bh->next_r1;
319 __free_page(r1_bh->bh_req.b_page);
324 static int raid1_map (mddev_t *mddev, kdev_t *rdev)
326 raid1_conf_t *conf = mddev_to_conf(mddev);
327 int i, disks = MD_SB_DISKS;
330 * Later we do read balancing on the read side
331 * now we use the first available disk.
334 for (i = 0; i < disks; i++) {
335 if (conf->mirrors[i].operational) {
336 *rdev = conf->mirrors[i].dev;
341 printk (KERN_ERR "raid1_map(): huh, no more operational devices?\n");
345 static void raid1_reschedule_retry (struct raid1_bh *r1_bh)
348 mddev_t *mddev = r1_bh->mddev;
349 raid1_conf_t *conf = mddev_to_conf(mddev);
351 md_spin_lock_irqsave(&retry_list_lock, flags);
352 if (raid1_retry_list == NULL)
353 raid1_retry_tail = &raid1_retry_list;
354 *raid1_retry_tail = r1_bh;
355 raid1_retry_tail = &r1_bh->next_r1;
356 r1_bh->next_r1 = NULL;
357 md_spin_unlock_irqrestore(&retry_list_lock, flags);
358 md_wakeup_thread(conf->thread);
362 static void inline io_request_done(unsigned long sector, raid1_conf_t *conf, int phase)
365 spin_lock_irqsave(&conf->segment_lock, flags);
366 if (sector < conf->start_active)
368 else if (sector >= conf->start_future && conf->phase == phase)
370 else if (!--conf->cnt_pending)
371 wake_up(&conf->wait_ready);
373 spin_unlock_irqrestore(&conf->segment_lock, flags);
376 static void inline sync_request_done (unsigned long sector, raid1_conf_t *conf)
379 spin_lock_irqsave(&conf->segment_lock, flags);
380 if (sector >= conf->start_ready)
382 else if (sector >= conf->start_active) {
383 if (!--conf->cnt_active) {
384 conf->start_active = conf->start_ready;
385 wake_up(&conf->wait_done);
388 spin_unlock_irqrestore(&conf->segment_lock, flags);
392 * raid1_end_bh_io() is called when we have finished servicing a mirrored
393 * operation and are ready to return a success/failure code to the buffer
396 static void raid1_end_bh_io (struct raid1_bh *r1_bh, int uptodate)
398 struct buffer_head *bh = r1_bh->master_bh;
400 io_request_done(bh->b_rsector, mddev_to_conf(r1_bh->mddev),
401 test_bit(R1BH_SyncPhase, &r1_bh->state));
403 bh->b_end_io(bh, uptodate);
404 raid1_free_r1bh(r1_bh);
406 void raid1_end_request (struct buffer_head *bh, int uptodate)
408 struct raid1_bh * r1_bh = (struct raid1_bh *)(bh->b_private);
411 * this branch is our 'one mirror IO has finished' event handler:
414 md_error (r1_bh->mddev, bh->b_dev);
417 * Set R1BH_Uptodate in our master buffer_head, so that
418 * we will return a good error code for to the higher
419 * levels even if IO on some other mirrored buffer fails.
421 * The 'master' represents the complex operation to
422 * user-side. So if something waits for IO, then it will
423 * wait for the 'master' buffer_head.
425 set_bit (R1BH_Uptodate, &r1_bh->state);
428 * We split up the read and write side, imho they are
429 * conceptually different.
432 if ( (r1_bh->cmd == READ) || (r1_bh->cmd == READA) ) {
434 * we have only one buffer_head on the read side
438 raid1_end_bh_io(r1_bh, uptodate);
444 printk(KERN_ERR "raid1: %s: rescheduling block %lu\n",
445 partition_name(bh->b_dev), bh->b_blocknr);
446 raid1_reschedule_retry(r1_bh);
453 * Let's see if all mirrored write operations have finished
457 if (atomic_dec_and_test(&r1_bh->remaining))
458 raid1_end_bh_io(r1_bh, test_bit(R1BH_Uptodate, &r1_bh->state));
462 * This routine returns the disk from which the requested read should
463 * be done. It bookkeeps the last read position for every disk
464 * in array and when new read requests come, the disk which last
465 * position is nearest to the request, is chosen.
467 * TODO: now if there are 2 mirrors in the same 2 devices, performance
468 * degrades dramatically because position is mirror, not device based.
469 * This should be changed to be device based. Also atomic sequential
470 * reads should be somehow balanced.
473 static int raid1_read_balance (raid1_conf_t *conf, struct buffer_head *bh)
475 int new_disk = conf->last_used;
476 const int sectors = bh->b_size >> 9;
477 const unsigned long this_sector = bh->b_rsector;
479 unsigned long new_distance;
480 unsigned long current_distance;
483 * Check if it is sane at all to balance
486 if (conf->resync_mirrors)
490 /* make sure that disk is operational */
491 while( !conf->mirrors[new_disk].operational) {
492 if (new_disk <= 0) new_disk = conf->raid_disks;
494 if (new_disk == disk) {
496 * This means no working disk was found
497 * Nothing much to do, lets not change anything
498 * and hope for the best...
501 new_disk = conf->last_used;
507 /* now disk == new_disk == starting point for search */
510 * Don't touch anything for sequential reads.
513 if (this_sector == conf->mirrors[new_disk].head_position)
517 * If reads have been done only on a single disk
518 * for a time, lets give another disk a change.
519 * This is for kicking those idling disks so that
520 * they would find work near some hotspot.
523 if (conf->sect_count >= conf->mirrors[new_disk].sect_limit) {
524 conf->sect_count = 0;
526 #if defined(CONFIG_SPARC64) && (__GNUC__ == 2) && (__GNUC_MINOR__ == 92)
527 /* Work around a compiler bug in egcs-2.92.11 19980921 */
528 new_disk = *(volatile int *)&new_disk;
532 new_disk = conf->raid_disks;
534 if (new_disk == disk)
536 } while ((conf->mirrors[new_disk].write_only) ||
537 (!conf->mirrors[new_disk].operational));
542 current_distance = abs(this_sector -
543 conf->mirrors[disk].head_position);
545 /* Find the disk which is closest */
549 disk = conf->raid_disks;
552 if ((conf->mirrors[disk].write_only) ||
553 (!conf->mirrors[disk].operational))
556 new_distance = abs(this_sector -
557 conf->mirrors[disk].head_position);
559 if (new_distance < current_distance) {
560 conf->sect_count = 0;
561 current_distance = new_distance;
564 } while (disk != conf->last_used);
567 conf->mirrors[new_disk].head_position = this_sector + sectors;
569 conf->last_used = new_disk;
570 conf->sect_count += sectors;
575 static int raid1_make_request (request_queue_t *q,
576 struct buffer_head * bh)
578 mddev_t *mddev = q->queuedata;
579 raid1_conf_t *conf = mddev_to_conf(mddev);
580 struct buffer_head *bh_req, *bhl;
581 struct raid1_bh * r1_bh;
582 int disks = MD_SB_DISKS;
584 struct mirror_info *mirror;
586 if (!buffer_locked(bh))
590 * make_request() can abort the operation when READA is being
591 * used and no empty request is available.
593 * Currently, just replace the command with READ/WRITE.
595 r1_bh = raid1_alloc_r1bh (conf);
597 spin_lock_irq(&conf->segment_lock);
598 wait_event_lock_irq(conf->wait_done,
599 bh->b_rsector < conf->start_active ||
600 bh->b_rsector >= conf->start_future,
602 if (bh->b_rsector < conf->start_active)
607 set_bit(R1BH_SyncPhase, &r1_bh->state);
609 spin_unlock_irq(&conf->segment_lock);
612 * i think the read and write branch should be separated completely,
613 * since we want to do read balancing on the read side for example.
614 * Alternative implementations? :) --mingo
617 r1_bh->master_bh = bh;
618 r1_bh->mddev = mddev;
623 * read balancing logic:
625 mirror = conf->mirrors + raid1_read_balance(conf, bh);
627 bh_req = &r1_bh->bh_req;
628 memcpy(bh_req, bh, sizeof(*bh));
629 bh_req->b_blocknr = bh->b_rsector;
630 bh_req->b_dev = mirror->dev;
631 bh_req->b_rdev = mirror->dev;
632 /* bh_req->b_rsector = bh->n_rsector; */
633 bh_req->b_end_io = raid1_end_request;
634 bh_req->b_private = r1_bh;
635 generic_make_request (rw, bh_req);
643 bhl = raid1_alloc_bh(conf, conf->raid_disks);
644 for (i = 0; i < disks; i++) {
645 struct buffer_head *mbh;
646 if (!conf->mirrors[i].operational)
650 * We should use a private pool (size depending on NR_REQUEST),
651 * to avoid writes filling up the memory with bhs
653 * Such pools are much faster than kmalloc anyways (so we waste
654 * almost nothing by not using the master bh when writing and
655 * win alot of cleanness) but for now we are cool enough. --mingo
657 * It's safe to sleep here, buffer heads cannot be used in a shared
658 * manner in the write branch. Look how we lock the buffer at the
659 * beginning of this function to grok the difference ;)
668 mbh->b_this_page = (struct buffer_head *)1;
671 * prepare mirrored mbh (fields ordered for max mem throughput):
673 mbh->b_blocknr = bh->b_rsector;
674 mbh->b_dev = conf->mirrors[i].dev;
675 mbh->b_rdev = conf->mirrors[i].dev;
676 mbh->b_rsector = bh->b_rsector;
677 mbh->b_state = (1<<BH_Req) | (1<<BH_Dirty) |
678 (1<<BH_Mapped) | (1<<BH_Lock);
680 atomic_set(&mbh->b_count, 1);
681 mbh->b_size = bh->b_size;
682 mbh->b_page = bh->b_page;
683 mbh->b_data = bh->b_data;
684 mbh->b_list = BUF_LOCKED;
685 mbh->b_end_io = raid1_end_request;
686 mbh->b_private = r1_bh;
688 mbh->b_next = r1_bh->mirror_bh_list;
689 r1_bh->mirror_bh_list = mbh;
692 if (bhl) raid1_free_bh(conf,bhl);
694 /* Gag - all mirrors non-operational.. */
695 raid1_end_bh_io(r1_bh, 0);
698 md_atomic_set(&r1_bh->remaining, sum_bhs);
701 * We have to be a bit careful about the semaphore above, thats
702 * why we start the requests separately. Since kmalloc() could
703 * fail, sleep and make_request() can sleep too, this is the
704 * safer solution. Imagine, end_request decreasing the semaphore
705 * before we could have set it up ... We could play tricks with
706 * the semaphore (presetting it and correcting at the end if
707 * sum_bhs is not 'n' but we have to do end_request by hand if
708 * all requests finish until we had a chance to set up the
709 * semaphore correctly ... lots of races).
711 bh = r1_bh->mirror_bh_list;
713 struct buffer_head *bh2 = bh;
715 generic_make_request(rw, bh2);
720 static void raid1_status(struct seq_file *seq, mddev_t *mddev)
722 raid1_conf_t *conf = mddev_to_conf(mddev);
725 seq_printf(seq, " [%d/%d] [", conf->raid_disks,
726 conf->working_disks);
727 for (i = 0; i < conf->raid_disks; i++)
728 seq_printf(seq, "%s",
729 conf->mirrors[i].operational ? "U" : "_");
730 seq_printf(seq, "]");
733 #define LAST_DISK KERN_ALERT \
734 "raid1: only one disk left and IO error.\n"
736 #define NO_SPARE_DISK KERN_ALERT \
737 "raid1: no spare disk left, degrading mirror level by one.\n"
739 #define DISK_FAILED KERN_ALERT \
740 "raid1: Disk failure on %s, disabling device. \n" \
741 " Operation continuing on %d devices\n"
743 #define START_SYNCING KERN_ALERT \
744 "raid1: start syncing spare disk.\n"
746 #define ALREADY_SYNCING KERN_INFO \
747 "raid1: syncing already in progress.\n"
749 static void mark_disk_bad (mddev_t *mddev, int failed)
751 raid1_conf_t *conf = mddev_to_conf(mddev);
752 struct mirror_info *mirror = conf->mirrors+failed;
753 mdp_super_t *sb = mddev->sb;
755 mirror->operational = 0;
756 mark_disk_faulty(sb->disks+mirror->number);
757 mark_disk_nonsync(sb->disks+mirror->number);
758 mark_disk_inactive(sb->disks+mirror->number);
759 if (!mirror->write_only)
764 md_wakeup_thread(conf->thread);
765 if (!mirror->write_only)
766 conf->working_disks--;
767 printk (DISK_FAILED, partition_name (mirror->dev),
768 conf->working_disks);
771 static int raid1_error (mddev_t *mddev, kdev_t dev)
773 raid1_conf_t *conf = mddev_to_conf(mddev);
774 struct mirror_info * mirrors = conf->mirrors;
775 int disks = MD_SB_DISKS;
779 * If it is not operational, then we have already marked it as dead
780 * else if it is the last working disks, ignore the error, let the
781 * next level up know.
782 * else mark the drive as failed
785 for (i = 0; i < disks; i++)
786 if (mirrors[i].dev==dev && mirrors[i].operational)
791 if (i < conf->raid_disks && conf->working_disks == 1) {
792 /* Don't fail the drive, act as though we were just a
793 * normal single drive
798 mark_disk_bad(mddev, i);
808 static void print_raid1_conf (raid1_conf_t *conf)
811 struct mirror_info *tmp;
813 printk("RAID1 conf printout:\n");
815 printk("(conf==NULL)\n");
818 printk(" --- wd:%d rd:%d nd:%d\n", conf->working_disks,
819 conf->raid_disks, conf->nr_disks);
821 for (i = 0; i < MD_SB_DISKS; i++) {
822 tmp = conf->mirrors + i;
823 printk(" disk %d, s:%d, o:%d, n:%d rd:%d us:%d dev:%s\n",
824 i, tmp->spare,tmp->operational,
825 tmp->number,tmp->raid_disk,tmp->used_slot,
826 partition_name(tmp->dev));
830 static void close_sync(raid1_conf_t *conf)
832 mddev_t *mddev = conf->mddev;
833 /* If reconstruction was interrupted, we need to close the "active" and "pending"
835 * we know that there are no active rebuild requests, os cnt_active == cnt_ready ==0
837 /* this is really needed when recovery stops too... */
838 spin_lock_irq(&conf->segment_lock);
839 conf->start_active = conf->start_pending;
840 conf->start_ready = conf->start_pending;
841 wait_event_lock_irq(conf->wait_ready, !conf->cnt_pending, conf->segment_lock);
842 conf->start_active =conf->start_ready = conf->start_pending = conf->start_future;
843 conf->start_future = (mddev->sb->size<<1)+1;
844 conf->cnt_pending = conf->cnt_future;
845 conf->cnt_future = 0;
846 conf->phase = conf->phase ^1;
847 wait_event_lock_irq(conf->wait_ready, !conf->cnt_pending, conf->segment_lock);
848 conf->start_active = conf->start_ready = conf->start_pending = conf->start_future = 0;
850 conf->cnt_future = conf->cnt_done;;
852 spin_unlock_irq(&conf->segment_lock);
853 wake_up(&conf->wait_done);
856 static int raid1_diskop(mddev_t *mddev, mdp_disk_t **d, int state)
859 int i, failed_disk=-1, spare_disk=-1, removed_disk=-1, added_disk=-1;
860 raid1_conf_t *conf = mddev->private;
861 struct mirror_info *tmp, *sdisk, *fdisk, *rdisk, *adisk;
862 mdp_super_t *sb = mddev->sb;
863 mdp_disk_t *failed_desc, *spare_desc, *added_desc;
864 mdk_rdev_t *spare_rdev, *failed_rdev;
866 print_raid1_conf(conf);
869 case DISKOP_SPARE_ACTIVE:
870 case DISKOP_SPARE_INACTIVE:
871 /* need to wait for pending sync io before locking device */
875 md_spin_lock_irq(&conf->device_lock);
881 case DISKOP_SPARE_ACTIVE:
884 * Find the failed disk within the RAID1 configuration ...
885 * (this can only be in the first conf->working_disks part)
887 for (i = 0; i < conf->raid_disks; i++) {
888 tmp = conf->mirrors + i;
889 if ((!tmp->operational && !tmp->spare) ||
896 * When we activate a spare disk we _must_ have a disk in
897 * the lower (active) part of the array to replace.
899 if ((failed_disk == -1) || (failed_disk >= conf->raid_disks)) {
906 case DISKOP_SPARE_WRITE:
907 case DISKOP_SPARE_INACTIVE:
910 * Find the spare disk ... (can only be in the 'high'
913 for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
914 tmp = conf->mirrors + i;
915 if (tmp->spare && tmp->number == (*d)->number) {
920 if (spare_disk == -1) {
927 case DISKOP_HOT_REMOVE_DISK:
929 for (i = 0; i < MD_SB_DISKS; i++) {
930 tmp = conf->mirrors + i;
931 if (tmp->used_slot && (tmp->number == (*d)->number)) {
932 if (tmp->operational) {
940 if (removed_disk == -1) {
947 case DISKOP_HOT_ADD_DISK:
949 for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
950 tmp = conf->mirrors + i;
951 if (!tmp->used_slot) {
956 if (added_disk == -1) {
966 * Switch the spare disk to write-only mode:
968 case DISKOP_SPARE_WRITE:
969 sdisk = conf->mirrors + spare_disk;
970 sdisk->operational = 1;
971 sdisk->write_only = 1;
974 * Deactivate a spare disk:
976 case DISKOP_SPARE_INACTIVE:
977 if (conf->start_future > 0) {
982 sdisk = conf->mirrors + spare_disk;
983 sdisk->operational = 0;
984 sdisk->write_only = 0;
987 * Activate (mark read-write) the (now sync) spare disk,
988 * which means we switch it's 'raid position' (->raid_disk)
989 * with the failed disk. (only the first 'conf->nr_disks'
990 * slots are used for 'real' disks and we must preserve this
993 case DISKOP_SPARE_ACTIVE:
994 if (conf->start_future > 0) {
999 sdisk = conf->mirrors + spare_disk;
1000 fdisk = conf->mirrors + failed_disk;
1002 spare_desc = &sb->disks[sdisk->number];
1003 failed_desc = &sb->disks[fdisk->number];
1005 if (spare_desc != *d) {
1011 if (spare_desc->raid_disk != sdisk->raid_disk) {
1017 if (sdisk->raid_disk != spare_disk) {
1023 if (failed_desc->raid_disk != fdisk->raid_disk) {
1029 if (fdisk->raid_disk != failed_disk) {
1036 * do the switch finally
1038 spare_rdev = find_rdev_nr(mddev, spare_desc->number);
1039 failed_rdev = find_rdev_nr(mddev, failed_desc->number);
1041 /* There must be a spare_rdev, but there may not be a
1042 * failed_rdev. That slot might be empty...
1044 spare_rdev->desc_nr = failed_desc->number;
1046 failed_rdev->desc_nr = spare_desc->number;
1048 xchg_values(*spare_desc, *failed_desc);
1049 xchg_values(*fdisk, *sdisk);
1052 * (careful, 'failed' and 'spare' are switched from now on)
1054 * we want to preserve linear numbering and we want to
1055 * give the proper raid_disk number to the now activated
1056 * disk. (this means we switch back these values)
1059 xchg_values(spare_desc->raid_disk, failed_desc->raid_disk);
1060 xchg_values(sdisk->raid_disk, fdisk->raid_disk);
1061 xchg_values(spare_desc->number, failed_desc->number);
1062 xchg_values(sdisk->number, fdisk->number);
1066 if (sdisk->dev == MKDEV(0,0))
1067 sdisk->used_slot = 0;
1069 * this really activates the spare.
1072 fdisk->write_only = 0;
1075 * if we activate a spare, we definitely replace a
1076 * non-operational disk slot in the 'low' area of
1080 conf->working_disks++;
1084 case DISKOP_HOT_REMOVE_DISK:
1085 rdisk = conf->mirrors + removed_disk;
1087 if (rdisk->spare && (removed_disk < conf->raid_disks)) {
1092 rdisk->dev = MKDEV(0,0);
1093 rdisk->used_slot = 0;
1097 case DISKOP_HOT_ADD_DISK:
1098 adisk = conf->mirrors + added_disk;
1101 if (added_disk != added_desc->number) {
1107 adisk->number = added_desc->number;
1108 adisk->raid_disk = added_desc->raid_disk;
1109 adisk->dev = MKDEV(added_desc->major,added_desc->minor);
1111 adisk->operational = 0;
1112 adisk->write_only = 0;
1114 adisk->used_slot = 1;
1115 adisk->head_position = 0;
1126 md_spin_unlock_irq(&conf->device_lock);
1127 if (state == DISKOP_SPARE_ACTIVE || state == DISKOP_SPARE_INACTIVE)
1128 /* should move to "END_REBUILD" when such exists */
1129 raid1_shrink_buffers(conf);
1131 print_raid1_conf(conf);
1136 #define IO_ERROR KERN_ALERT \
1137 "raid1: %s: unrecoverable I/O read error for block %lu\n"
1139 #define REDIRECT_SECTOR KERN_ERR \
1140 "raid1: %s: redirecting sector %lu to another mirror\n"
1143 * This is a kernel thread which:
1145 * 1. Retries failed read operations on working mirrors.
1146 * 2. Updates the raid superblock when problems encounter.
1147 * 3. Performs writes following reads for array syncronising.
1149 static void end_sync_write(struct buffer_head *bh, int uptodate);
1150 static void end_sync_read(struct buffer_head *bh, int uptodate);
1152 static void raid1d (void *data)
1154 struct raid1_bh *r1_bh;
1155 struct buffer_head *bh;
1156 unsigned long flags;
1157 raid1_conf_t *conf = data;
1158 mddev_t *mddev = conf->mddev;
1161 if (mddev->sb_dirty)
1162 md_update_sb(mddev);
1165 md_spin_lock_irqsave(&retry_list_lock, flags);
1166 r1_bh = raid1_retry_list;
1169 raid1_retry_list = r1_bh->next_r1;
1170 md_spin_unlock_irqrestore(&retry_list_lock, flags);
1172 mddev = r1_bh->mddev;
1173 bh = &r1_bh->bh_req;
1174 switch(r1_bh->cmd) {
1176 /* have to allocate lots of bh structures and
1179 if (test_bit(R1BH_Uptodate, &r1_bh->state)) {
1181 int disks = MD_SB_DISKS;
1182 struct buffer_head *bhl, *mbh;
1184 conf = mddev_to_conf(mddev);
1185 bhl = raid1_alloc_bh(conf, conf->raid_disks); /* don't really need this many */
1186 for (i = 0; i < disks ; i++) {
1187 if (!conf->mirrors[i].operational)
1189 if (i==conf->last_used)
1190 /* we read from here, no need to write */
1192 if (i < conf->raid_disks
1193 && !conf->resync_mirrors)
1194 /* don't need to write this,
1195 * we are just rebuilding */
1203 mbh->b_this_page = (struct buffer_head *)1;
1207 * prepare mirrored bh (fields ordered for max mem throughput):
1209 mbh->b_blocknr = bh->b_blocknr;
1210 mbh->b_dev = conf->mirrors[i].dev;
1211 mbh->b_rdev = conf->mirrors[i].dev;
1212 mbh->b_rsector = bh->b_blocknr;
1213 mbh->b_state = (1<<BH_Req) | (1<<BH_Dirty) |
1214 (1<<BH_Mapped) | (1<<BH_Lock);
1215 atomic_set(&mbh->b_count, 1);
1216 mbh->b_size = bh->b_size;
1217 mbh->b_page = bh->b_page;
1218 mbh->b_data = bh->b_data;
1219 mbh->b_list = BUF_LOCKED;
1220 mbh->b_end_io = end_sync_write;
1221 mbh->b_private = r1_bh;
1223 mbh->b_next = r1_bh->mirror_bh_list;
1224 r1_bh->mirror_bh_list = mbh;
1228 md_atomic_set(&r1_bh->remaining, sum_bhs);
1229 if (bhl) raid1_free_bh(conf, bhl);
1230 mbh = r1_bh->mirror_bh_list;
1233 /* nowhere to write this too... I guess we
1236 sync_request_done(bh->b_blocknr, conf);
1237 md_done_sync(mddev, bh->b_size>>9, 0);
1238 raid1_free_buf(r1_bh);
1241 struct buffer_head *bh1 = mbh;
1243 generic_make_request(WRITE, bh1);
1244 md_sync_acct(bh1->b_dev, bh1->b_size/512);
1247 /* There is no point trying a read-for-reconstruct
1248 * as reconstruct is about to be aborted
1251 printk (IO_ERROR, partition_name(bh->b_dev), bh->b_blocknr);
1252 md_done_sync(mddev, bh->b_size>>9, 0);
1259 raid1_map (mddev, &bh->b_dev);
1260 if (bh->b_dev == dev) {
1261 printk (IO_ERROR, partition_name(bh->b_dev), bh->b_blocknr);
1262 raid1_end_bh_io(r1_bh, 0);
1264 printk (REDIRECT_SECTOR,
1265 partition_name(bh->b_dev), bh->b_blocknr);
1266 bh->b_rdev = bh->b_dev;
1267 bh->b_rsector = bh->b_blocknr;
1268 generic_make_request (r1_bh->cmd, bh);
1273 md_spin_unlock_irqrestore(&retry_list_lock, flags);
1276 #undef REDIRECT_SECTOR
1279 * Private kernel thread to reconstruct mirrors after an unclean
1282 static void raid1syncd (void *data)
1284 raid1_conf_t *conf = data;
1285 mddev_t *mddev = conf->mddev;
1287 if (!conf->resync_mirrors)
1289 if (mddev->recovery_running != 2)
1291 if (!md_do_sync(mddev, NULL)) {
1293 * Only if everything went Ok.
1295 conf->resync_mirrors = 0;
1303 * perform a "sync" on one "block"
1305 * We need to make sure that no normal I/O request - particularly write
1306 * requests - conflict with active sync requests.
1307 * This is achieved by conceptually dividing the device space into a
1308 * number of sections:
1309 * DONE: 0 .. a-1 These blocks are in-sync
1310 * ACTIVE: a.. b-1 These blocks may have active sync requests, but
1311 * no normal IO requests
1312 * READY: b .. c-1 These blocks have no normal IO requests - sync
1313 * request may be happening
1314 * PENDING: c .. d-1 These blocks may have IO requests, but no new
1315 * ones will be added
1316 * FUTURE: d .. end These blocks are not to be considered yet. IO may
1317 * be happening, but not sync
1320 * phase which flips (0 or 1) each time d moves and
1322 * z = active io requests in FUTURE since d moved - marked with
1324 * y = active io requests in FUTURE before d moved, or PENDING -
1325 * marked with previous phase
1326 * x = active sync requests in READY
1327 * w = active sync requests in ACTIVE
1328 * v = active io requests in DONE
1330 * Normally, a=b=c=d=0 and z= active io requests
1331 * or a=b=c=d=END and v= active io requests
1332 * Allowed changes to a,b,c,d:
1333 * A: c==d && y==0 -> d+=window, y=z, z=0, phase=!phase
1337 * E: a==b==c==d==end -> a=b=c=d=0, z=v, v=0
1339 * At start of sync we apply A.
1340 * When y reaches 0, we apply B then A then being sync requests
1341 * When sync point reaches c-1, we wait for y==0, and W==0, and
1342 * then apply apply B then A then D then C.
1343 * Finally, we apply E
1345 * The sync request simply issues a "read" against a working drive
1346 * This is marked so that on completion the raid1d thread is woken to
1347 * issue suitable write requests
1350 static int raid1_sync_request (mddev_t *mddev, unsigned long sector_nr)
1352 raid1_conf_t *conf = mddev_to_conf(mddev);
1353 struct mirror_info *mirror;
1354 struct raid1_bh *r1_bh;
1355 struct buffer_head *bh;
1362 /* we want enough buffers to hold twice the window of 128*/
1363 buffs = 128 *2 / (PAGE_SIZE>>9);
1364 buffs = raid1_grow_buffers(conf, buffs);
1367 conf->window = buffs*(PAGE_SIZE>>9)/2;
1369 spin_lock_irq(&conf->segment_lock);
1372 conf->start_active = 0;
1373 conf->start_ready = 0;
1374 conf->start_pending = 0;
1375 conf->start_future = 0;
1378 conf->cnt_future += conf->cnt_done+conf->cnt_pending;
1379 conf->cnt_done = conf->cnt_pending = 0;
1380 if (conf->cnt_ready || conf->cnt_active)
1383 while (sector_nr >= conf->start_pending) {
1384 PRINTK("wait .. sect=%lu start_active=%d ready=%d pending=%d future=%d, cnt_done=%d active=%d ready=%d pending=%d future=%d\n",
1385 sector_nr, conf->start_active, conf->start_ready, conf->start_pending, conf->start_future,
1386 conf->cnt_done, conf->cnt_active, conf->cnt_ready, conf->cnt_pending, conf->cnt_future);
1387 wait_event_lock_irq(conf->wait_done,
1389 conf->segment_lock);
1390 wait_event_lock_irq(conf->wait_ready,
1392 conf->segment_lock);
1393 conf->start_active = conf->start_ready;
1394 conf->start_ready = conf->start_pending;
1395 conf->start_pending = conf->start_future;
1396 conf->start_future = conf->start_future+conf->window;
1397 // Note: falling off the end is not a problem
1398 conf->phase = conf->phase ^1;
1399 conf->cnt_active = conf->cnt_ready;
1400 conf->cnt_ready = 0;
1401 conf->cnt_pending = conf->cnt_future;
1402 conf->cnt_future = 0;
1403 wake_up(&conf->wait_done);
1406 spin_unlock_irq(&conf->segment_lock);
1409 /* If reconstructing, and >1 working disc,
1410 * could dedicate one to rebuild and others to
1411 * service read requests ..
1413 disk = conf->last_used;
1414 /* make sure disk is operational */
1415 while (!conf->mirrors[disk].operational) {
1416 if (disk <= 0) disk = conf->raid_disks;
1418 if (disk == conf->last_used)
1421 conf->last_used = disk;
1423 mirror = conf->mirrors+conf->last_used;
1425 r1_bh = raid1_alloc_buf (conf);
1426 r1_bh->master_bh = NULL;
1427 r1_bh->mddev = mddev;
1428 r1_bh->cmd = SPECIAL;
1429 bh = &r1_bh->bh_req;
1431 block_nr = sector_nr;
1433 while (!(block_nr & 1) && bsize < PAGE_SIZE
1434 && (block_nr+2)*(bsize>>9) < (mddev->sb->size *2)) {
1439 bh->b_list = BUF_LOCKED;
1440 bh->b_dev = mirror->dev;
1441 bh->b_rdev = mirror->dev;
1442 bh->b_state = (1<<BH_Req) | (1<<BH_Mapped) | (1<<BH_Lock);
1447 if (bh->b_data != page_address(bh->b_page))
1449 bh->b_end_io = end_sync_read;
1450 bh->b_private = r1_bh;
1451 bh->b_blocknr = sector_nr;
1452 bh->b_rsector = sector_nr;
1453 init_waitqueue_head(&bh->b_wait);
1455 generic_make_request(READ, bh);
1456 md_sync_acct(bh->b_dev, bh->b_size/512);
1458 return (bsize >> 9);
1461 raid1_shrink_buffers(conf);
1465 static void end_sync_read(struct buffer_head *bh, int uptodate)
1467 struct raid1_bh * r1_bh = (struct raid1_bh *)(bh->b_private);
1469 /* we have read a block, now it needs to be re-written,
1470 * or re-read if the read failed.
1471 * We don't do much here, just schedule handling by raid1d
1474 md_error (r1_bh->mddev, bh->b_dev);
1476 set_bit(R1BH_Uptodate, &r1_bh->state);
1477 raid1_reschedule_retry(r1_bh);
1480 static void end_sync_write(struct buffer_head *bh, int uptodate)
1482 struct raid1_bh * r1_bh = (struct raid1_bh *)(bh->b_private);
1485 md_error (r1_bh->mddev, bh->b_dev);
1486 if (atomic_dec_and_test(&r1_bh->remaining)) {
1487 mddev_t *mddev = r1_bh->mddev;
1488 unsigned long sect = bh->b_blocknr;
1489 int size = bh->b_size;
1490 raid1_free_buf(r1_bh);
1491 sync_request_done(sect, mddev_to_conf(mddev));
1492 md_done_sync(mddev,size>>9, uptodate);
1496 #define INVALID_LEVEL KERN_WARNING \
1497 "raid1: md%d: raid level not set to mirroring (%d)\n"
1499 #define NO_SB KERN_ERR \
1500 "raid1: disabled mirror %s (couldn't access raid superblock)\n"
1502 #define ERRORS KERN_ERR \
1503 "raid1: disabled mirror %s (errors detected)\n"
1505 #define NOT_IN_SYNC KERN_ERR \
1506 "raid1: disabled mirror %s (not in sync)\n"
1508 #define INCONSISTENT KERN_ERR \
1509 "raid1: disabled mirror %s (inconsistent descriptor)\n"
1511 #define ALREADY_RUNNING KERN_ERR \
1512 "raid1: disabled mirror %s (mirror %d already operational)\n"
1514 #define OPERATIONAL KERN_INFO \
1515 "raid1: device %s operational as mirror %d\n"
1517 #define MEM_ERROR KERN_ERR \
1518 "raid1: couldn't allocate memory for md%d\n"
1520 #define SPARE KERN_INFO \
1521 "raid1: spare disk %s\n"
1523 #define NONE_OPERATIONAL KERN_ERR \
1524 "raid1: no operational mirrors for md%d\n"
1526 #define ARRAY_IS_ACTIVE KERN_INFO \
1527 "raid1: raid set md%d active with %d out of %d mirrors\n"
1529 #define THREAD_ERROR KERN_ERR \
1530 "raid1: couldn't allocate thread for md%d\n"
1532 #define START_RESYNC KERN_WARNING \
1533 "raid1: raid set md%d not clean; reconstructing mirrors\n"
1535 static int raid1_run (mddev_t *mddev)
1539 struct mirror_info *disk;
1540 mdp_super_t *sb = mddev->sb;
1541 mdp_disk_t *descriptor;
1543 struct md_list_head *tmp;
1544 int start_recovery = 0;
1548 if (sb->level != 1) {
1549 printk(INVALID_LEVEL, mdidx(mddev), sb->level);
1553 * copy the already verified devices into our private RAID1
1554 * bookkeeping area. [whatever we allocate in raid1_run(),
1555 * should be freed in raid1_stop()]
1558 conf = kmalloc(sizeof(raid1_conf_t), GFP_KERNEL);
1559 mddev->private = conf;
1561 printk(MEM_ERROR, mdidx(mddev));
1564 memset(conf, 0, sizeof(*conf));
1566 ITERATE_RDEV(mddev,rdev,tmp) {
1568 printk(ERRORS, partition_name(rdev->dev));
1575 if (rdev->desc_nr == -1) {
1579 descriptor = &sb->disks[rdev->desc_nr];
1580 disk_idx = descriptor->raid_disk;
1581 disk = conf->mirrors + disk_idx;
1583 if (disk_faulty(descriptor)) {
1584 disk->number = descriptor->number;
1585 disk->raid_disk = disk_idx;
1586 disk->dev = rdev->dev;
1587 disk->sect_limit = MAX_WORK_PER_DISK;
1588 disk->operational = 0;
1589 disk->write_only = 0;
1591 disk->used_slot = 1;
1592 disk->head_position = 0;
1595 if (disk_active(descriptor)) {
1596 if (!disk_sync(descriptor)) {
1598 partition_name(rdev->dev));
1601 if ((descriptor->number > MD_SB_DISKS) ||
1602 (disk_idx > sb->raid_disks)) {
1604 printk(INCONSISTENT,
1605 partition_name(rdev->dev));
1608 if (disk->operational) {
1609 printk(ALREADY_RUNNING,
1610 partition_name(rdev->dev),
1614 printk(OPERATIONAL, partition_name(rdev->dev),
1616 disk->number = descriptor->number;
1617 disk->raid_disk = disk_idx;
1618 disk->dev = rdev->dev;
1619 disk->sect_limit = MAX_WORK_PER_DISK;
1620 disk->operational = 1;
1621 disk->write_only = 0;
1623 disk->used_slot = 1;
1624 disk->head_position = 0;
1625 conf->working_disks++;
1628 * Must be a spare disk ..
1630 printk(SPARE, partition_name(rdev->dev));
1631 disk->number = descriptor->number;
1632 disk->raid_disk = disk_idx;
1633 disk->dev = rdev->dev;
1634 disk->sect_limit = MAX_WORK_PER_DISK;
1635 disk->operational = 0;
1636 disk->write_only = 0;
1638 disk->used_slot = 1;
1639 disk->head_position = 0;
1642 conf->raid_disks = sb->raid_disks;
1643 conf->nr_disks = sb->nr_disks;
1644 conf->mddev = mddev;
1645 conf->device_lock = MD_SPIN_LOCK_UNLOCKED;
1647 conf->segment_lock = MD_SPIN_LOCK_UNLOCKED;
1648 init_waitqueue_head(&conf->wait_buffer);
1649 init_waitqueue_head(&conf->wait_done);
1650 init_waitqueue_head(&conf->wait_ready);
1652 if (!conf->working_disks) {
1653 printk(NONE_OPERATIONAL, mdidx(mddev));
1658 /* pre-allocate some buffer_head structures.
1659 * As a minimum, 1 r1bh and raid_disks buffer_heads
1660 * would probably get us by in tight memory situations,
1661 * but a few more is probably a good idea.
1662 * For now, try NR_RESERVED_BUFS r1bh and
1663 * NR_RESERVED_BUFS*raid_disks bufferheads
1664 * This will allow at least NR_RESERVED_BUFS concurrent
1665 * reads or writes even if kmalloc starts failing
1667 if (raid1_grow_r1bh(conf, NR_RESERVED_BUFS) < NR_RESERVED_BUFS ||
1668 raid1_grow_bh(conf, NR_RESERVED_BUFS*conf->raid_disks)
1669 < NR_RESERVED_BUFS*conf->raid_disks) {
1670 printk(MEM_ERROR, mdidx(mddev));
1674 for (i = 0; i < MD_SB_DISKS; i++) {
1676 descriptor = sb->disks+i;
1677 disk_idx = descriptor->raid_disk;
1678 disk = conf->mirrors + disk_idx;
1680 if (disk_faulty(descriptor) && (disk_idx < conf->raid_disks) &&
1683 disk->number = descriptor->number;
1684 disk->raid_disk = disk_idx;
1685 disk->dev = MKDEV(0,0);
1687 disk->operational = 0;
1688 disk->write_only = 0;
1690 disk->used_slot = 1;
1691 disk->head_position = 0;
1696 * find the first working one and use it as a starting point
1697 * to read balancing.
1699 for (j = 0; !conf->mirrors[j].operational && j < MD_SB_DISKS; j++)
1701 conf->last_used = j;
1706 const char * name = "raid1d";
1708 conf->thread = md_register_thread(raid1d, conf, name);
1709 if (!conf->thread) {
1710 printk(THREAD_ERROR, mdidx(mddev));
1715 if (!start_recovery && !(sb->state & (1 << MD_SB_CLEAN)) &&
1716 (conf->working_disks > 1)) {
1717 const char * name = "raid1syncd";
1719 conf->resync_thread = md_register_thread(raid1syncd, conf,name);
1720 if (!conf->resync_thread) {
1721 printk(THREAD_ERROR, mdidx(mddev));
1725 printk(START_RESYNC, mdidx(mddev));
1726 conf->resync_mirrors = 1;
1727 mddev->recovery_running = 2;
1728 md_wakeup_thread(conf->resync_thread);
1732 * Regenerate the "device is in sync with the raid set" bit for
1735 for (i = 0; i < MD_SB_DISKS; i++) {
1736 mark_disk_nonsync(sb->disks+i);
1737 for (j = 0; j < sb->raid_disks; j++) {
1738 if (!conf->mirrors[j].operational)
1740 if (sb->disks[i].number == conf->mirrors[j].number)
1741 mark_disk_sync(sb->disks+i);
1744 sb->active_disks = conf->working_disks;
1746 printk(ARRAY_IS_ACTIVE, mdidx(mddev), sb->active_disks, sb->raid_disks);
1748 * Ok, everything is just fine now
1753 raid1_shrink_r1bh(conf);
1754 raid1_shrink_bh(conf);
1755 raid1_shrink_buffers(conf);
1757 mddev->private = NULL;
1763 #undef INVALID_LEVEL
1768 #undef ALREADY_RUNNING
1771 #undef NONE_OPERATIONAL
1772 #undef ARRAY_IS_ACTIVE
1774 static int raid1_stop_resync (mddev_t *mddev)
1776 raid1_conf_t *conf = mddev_to_conf(mddev);
1778 if (conf->resync_thread) {
1779 if (conf->resync_mirrors) {
1780 md_interrupt_thread(conf->resync_thread);
1782 printk(KERN_INFO "raid1: mirror resync was not fully finished, restarting next time.\n");
1790 static int raid1_restart_resync (mddev_t *mddev)
1792 raid1_conf_t *conf = mddev_to_conf(mddev);
1794 if (conf->resync_mirrors) {
1795 if (!conf->resync_thread) {
1799 mddev->recovery_running = 2;
1800 md_wakeup_thread(conf->resync_thread);
1806 static int raid1_stop (mddev_t *mddev)
1808 raid1_conf_t *conf = mddev_to_conf(mddev);
1810 md_unregister_thread(conf->thread);
1811 if (conf->resync_thread)
1812 md_unregister_thread(conf->resync_thread);
1813 raid1_shrink_r1bh(conf);
1814 raid1_shrink_bh(conf);
1815 raid1_shrink_buffers(conf);
1817 mddev->private = NULL;
1822 static mdk_personality_t raid1_personality=
1825 make_request: raid1_make_request,
1828 status: raid1_status,
1829 error_handler: raid1_error,
1830 diskop: raid1_diskop,
1831 stop_resync: raid1_stop_resync,
1832 restart_resync: raid1_restart_resync,
1833 sync_request: raid1_sync_request
1836 static int md__init raid1_init (void)
1838 return register_md_personality (RAID1, &raid1_personality);
1841 static void raid1_exit (void)
1843 unregister_md_personality (RAID1);
1846 module_init(raid1_init);
1847 module_exit(raid1_exit);
1848 MODULE_LICENSE("GPL");