]> git.neil.brown.name Git - mdadm.git/blob - Assemble.c
Release mdadm-4.0
[mdadm.git] / Assemble.c
1 /*
2  * mdadm - manage Linux "md" devices aka RAID arrays.
3  *
4  * Copyright (C) 2001-2016 Neil Brown <neilb@suse.com>
5  *
6  *
7  *    This program is free software; you can redistribute it and/or modify
8  *    it under the terms of the GNU General Public License as published by
9  *    the Free Software Foundation; either version 2 of the License, or
10  *    (at your option) any later version.
11  *
12  *    This program is distributed in the hope that it will be useful,
13  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *    GNU General Public License for more details.
16  *
17  *    You should have received a copy of the GNU General Public License
18  *    along with this program; if not, write to the Free Software
19  *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  *
21  *    Author: Neil Brown
22  *    Email: <neilb@suse.de>
23  */
24
25 #include        "mdadm.h"
26 #include        <ctype.h>
27
28 static int name_matches(char *found, char *required, char *homehost, int require_homehost)
29 {
30         /* See if the name found matches the required name, possibly
31          * prefixed with 'homehost'
32          */
33         char *sep;
34         unsigned int l;
35
36         if (strcmp(found, required)==0)
37                 return 1;
38         sep = strchr(found, ':');
39         if (!sep)
40                 return 0;
41         l = sep - found;
42         if (strncmp(found, "any:", 4) == 0 ||
43             (homehost && strcmp(homehost, "any") == 0) ||
44             !require_homehost ||
45             (homehost && strlen(homehost) == l &&
46              strncmp(found, homehost, l) == 0)) {
47                 /* matching homehost */
48                 if (strcmp(sep+1, required) == 0)
49                         return 1;
50         }
51         return 0;
52 }
53
54 static int is_member_busy(char *metadata_version)
55 {
56         /* check if the given member array is active */
57         struct mdstat_ent *mdstat = mdstat_read(0, 0);
58         struct mdstat_ent *ent;
59         int busy = 0;
60
61         for (ent = mdstat; ent; ent = ent->next) {
62                 if (ent->metadata_version == NULL)
63                         continue;
64                 if (strncmp(ent->metadata_version, "external:", 9) != 0)
65                         continue;
66                 if (!is_subarray(&ent->metadata_version[9]))
67                         continue;
68                 /* Skip first char - it can be '/' or '-' */
69                 if (strcmp(&ent->metadata_version[10], metadata_version+1) == 0) {
70                         busy = 1;
71                         break;
72                 }
73         }
74         free_mdstat(mdstat);
75
76         return busy;
77 }
78
79 static int ident_matches(struct mddev_ident *ident,
80                          struct mdinfo *content,
81                          struct supertype *tst,
82                          char *homehost, int require_homehost,
83                          char *update, char *devname)
84 {
85
86         if (ident->uuid_set && (!update || strcmp(update, "uuid")!= 0) &&
87             same_uuid(content->uuid, ident->uuid, tst->ss->swapuuid)==0 &&
88             memcmp(content->uuid, uuid_zero, sizeof(int[4])) != 0) {
89                 if (devname)
90                         pr_err("%s has wrong uuid.\n", devname);
91                 return 0;
92         }
93         if (ident->name[0] && (!update || strcmp(update, "name")!= 0) &&
94             name_matches(content->name, ident->name, homehost, require_homehost)==0) {
95                 if (devname)
96                         pr_err("%s has wrong name.\n", devname);
97                 return 0;
98         }
99         if (ident->super_minor != UnSet &&
100             ident->super_minor != content->array.md_minor) {
101                 if (devname)
102                         pr_err("%s has wrong super-minor.\n",
103                                devname);
104                 return 0;
105         }
106         if (ident->level != UnSet &&
107             ident->level != content->array.level) {
108                 if (devname)
109                         pr_err("%s has wrong raid level.\n",
110                                devname);
111                 return 0;
112         }
113         if (ident->raid_disks != UnSet &&
114             content->array.raid_disks != 0 && /* metadata doesn't know how many to expect */
115             ident->raid_disks!= content->array.raid_disks) {
116                 if (devname)
117                         pr_err("%s requires wrong number of drives.\n",
118                                devname);
119                 return 0;
120         }
121         if (ident->member && ident->member[0]) {
122                 /* content->text_version must match */
123                 char *s = strchr(content->text_version+1, '/');
124                 if (s == NULL) {
125                         if (devname)
126                                 pr_err("%s is not a container and one is required.\n",
127                                        devname);
128                         return 0;
129                 } else if (strcmp(ident->member, s+1) != 0) {
130                         if (devname)
131                                 pr_err("skipping wrong member %s is %s\n",
132                                        content->text_version, devname);
133                         return 0;
134                 }
135         }
136         return 1;
137 }
138
139 static int select_devices(struct mddev_dev *devlist,
140                           struct mddev_ident *ident,
141                           struct supertype **stp,
142                           struct mdinfo **contentp,
143                           struct context *c,
144                           int inargv, int auto_assem)
145 {
146         struct mddev_dev *tmpdev;
147         int num_devs;
148         struct supertype *st = *stp;
149         struct mdinfo *content = NULL;
150         int report_mismatch = ((inargv && c->verbose >= 0) || c->verbose > 0);
151         struct domainlist *domains = NULL;
152
153         tmpdev = devlist; num_devs = 0;
154         while (tmpdev) {
155                 if (tmpdev->used)
156                         tmpdev->used = 2;
157                 else
158                         num_devs++;
159                 tmpdev->disposition = 0;
160                 tmpdev = tmpdev->next;
161         }
162
163         /* first walk the list of devices to find a consistent set
164          * that match the criterea, if that is possible.
165          * We flag the ones we like with 'used'.
166          */
167         for (tmpdev = devlist;
168              tmpdev;
169              tmpdev = tmpdev ? tmpdev->next : NULL) {
170                 char *devname = tmpdev->devname;
171                 int dfd;
172                 struct stat stb;
173                 struct supertype *tst;
174                 struct dev_policy *pol = NULL;
175                 int found_container = 0;
176
177                 if (tmpdev->used > 1)
178                         continue;
179
180                 if (ident->container) {
181                         if (ident->container[0] == '/' &&
182                             !same_dev(ident->container, devname)) {
183                                 if (report_mismatch)
184                                         pr_err("%s is not the container required (%s)\n",
185                                                devname, ident->container);
186                                 continue;
187                         }
188                 } else if (ident->devices &&
189                            !match_oneof(ident->devices, devname)) {
190                         /* Note that we ignore the "device=" identifier if a
191                          * "container=" is given.  Checking both is unnecessarily
192                          * complicated.
193                          */
194                         if (report_mismatch)
195                                 pr_err("%s is not one of %s\n", devname, ident->devices);
196                         continue;
197                 }
198
199                 tst = dup_super(st);
200
201                 dfd = dev_open(devname, O_RDONLY);
202                 if (dfd < 0) {
203                         if (report_mismatch)
204                                 pr_err("cannot open device %s: %s\n",
205                                        devname, strerror(errno));
206                         tmpdev->used = 2;
207                 } else if (fstat(dfd, &stb)< 0) {
208                         /* Impossible! */
209                         pr_err("fstat failed for %s: %s\n",
210                                devname, strerror(errno));
211                         tmpdev->used = 2;
212                 } else if ((stb.st_mode & S_IFMT) != S_IFBLK) {
213                         pr_err("%s is not a block device.\n",
214                                devname);
215                         tmpdev->used = 2;
216                 } else if (must_be_container(dfd)) {
217                         if (st) {
218                                 /* already found some components, this cannot
219                                  * be another one.
220                                  */
221                                 if (report_mismatch)
222                                         pr_err("%s is a container, but we are looking for components\n",
223                                                devname);
224                                 tmpdev->used = 2;
225 #if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO)
226                         } if (!tst && (tst = super_by_fd(dfd, NULL)) == NULL) {
227                                 if (report_mismatch)
228                                         pr_err("not a recognisable container: %s\n",
229                                                devname);
230                                 tmpdev->used = 2;
231 #endif
232                         } else if (!tst->ss->load_container
233                                    || tst->ss->load_container(tst, dfd, NULL)) {
234                                 if (report_mismatch)
235                                         pr_err("no correct container type: %s\n",
236                                                devname);
237                                 tmpdev->used = 2;
238                         } else if (auto_assem &&
239                                    !conf_test_metadata(tst->ss->name, (pol = devid_policy(stb.st_rdev)),
240                                                        tst->ss->match_home(tst, c->homehost) == 1)) {
241                                 if (report_mismatch)
242                                         pr_err("%s has metadata type %s for which auto-assembly is disabled\n",
243                                                devname, tst->ss->name);
244                                 tmpdev->used = 2;
245                         } else
246                                 found_container = 1;
247                 } else {
248                         if (!tst && (tst = guess_super(dfd)) == NULL) {
249                                 if (report_mismatch)
250                                         pr_err("no recogniseable superblock on %s\n",
251                                                devname);
252                                 tmpdev->used = 2;
253                         } else if ((tst->ignore_hw_compat = 0),
254                                    tst->ss->load_super(tst, dfd,
255                                                        report_mismatch ? devname : NULL)) {
256                                 if (report_mismatch)
257                                         pr_err("no RAID superblock on %s\n",
258                                                devname);
259                                 tmpdev->used = 2;
260                         } else if (tst->ss->compare_super == NULL) {
261                                 if (report_mismatch)
262                                         pr_err("Cannot assemble %s metadata on %s\n",
263                                                tst->ss->name, devname);
264                                 tmpdev->used = 2;
265                         } else if (auto_assem && st == NULL &&
266                                    !conf_test_metadata(tst->ss->name, (pol = devid_policy(stb.st_rdev)),
267                                                        tst->ss->match_home(tst, c->homehost) == 1)) {
268                                 if (report_mismatch)
269                                         pr_err("%s has metadata type %s for which auto-assembly is disabled\n",
270                                                devname, tst->ss->name);
271                                 tmpdev->used = 2;
272                         }
273                 }
274                 if (dfd >= 0) close(dfd);
275                 if (tmpdev->used == 2) {
276                         if (auto_assem || !inargv)
277                                 /* Ignore unrecognised devices during auto-assembly */
278                                 goto loop;
279                         if (ident->uuid_set || ident->name[0] ||
280                             ident->super_minor != UnSet)
281                                 /* Ignore unrecognised device if looking for
282                                  * specific array */
283                                 goto loop;
284
285                         pr_err("%s has no superblock - assembly aborted\n",
286                                devname);
287                         if (st)
288                                 st->ss->free_super(st);
289                         dev_policy_free(pol);
290                         domain_free(domains);
291                         return -1;
292                 }
293
294                 if (found_container) {
295                         /* tmpdev is a container.  We need to be either
296                          * looking for a member, or auto-assembling
297                          */
298                         /* should be safe to try an exclusive open now, we
299                          * have rejected anything that some other mdadm might
300                          * be looking at
301                          */
302                         dfd = dev_open(devname, O_RDONLY | O_EXCL);
303                         if (dfd < 0) {
304                                 if (report_mismatch)
305                                         pr_err("%s is busy - skipping\n", devname);
306                                 goto loop;
307                         }
308                         close(dfd);
309
310                         if (ident->container && ident->container[0] != '/') {
311                                 /* we have a uuid */
312                                 int uuid[4];
313
314                                 content = *contentp;
315                                 tst->ss->getinfo_super(tst, content, NULL);
316
317                                 if (!parse_uuid(ident->container, uuid) ||
318                                     !same_uuid(content->uuid, uuid, tst->ss->swapuuid)) {
319                                         if (report_mismatch)
320                                                 pr_err("%s has wrong UUID to be required container\n",
321                                                        devname);
322                                         goto loop;
323                                 }
324                         }
325                         /* It is worth looking inside this container.
326                          */
327                         if (c->verbose > 0)
328                                 pr_err("looking in container %s\n",
329                                        devname);
330
331                         for (content = tst->ss->container_content(tst, NULL);
332                              content;
333                              content = content->next) {
334
335                                 if (!ident_matches(ident, content, tst,
336                                                    c->homehost, c->require_homehost,
337                                                    c->update,
338                                                    report_mismatch ? devname : NULL))
339                                         /* message already printed */;
340                                 else if (is_member_busy(content->text_version)) {
341                                         if (report_mismatch)
342                                                 pr_err("member %s in %s is already assembled\n",
343                                                        content->text_version,
344                                                        devname);
345                                 } else if (content->array.state & (1<<MD_SB_BLOCK_VOLUME)) {
346                                         /* do not assemble arrays with unsupported configurations */
347                                         pr_err("Cannot activate member %s in %s.\n",
348                                                content->text_version,
349                                                devname);
350                                 } else
351                                         break;
352                         }
353                         if (!content) {
354                                 tmpdev->used = 2;
355                                 goto loop; /* empty container */
356                         }
357
358                         st = tst; tst = NULL;
359                         if (!auto_assem && inargv && tmpdev->next != NULL) {
360                                 pr_err("%s is a container, but is not only device given: confused and aborting\n",
361                                        devname);
362                                 st->ss->free_super(st);
363                                 dev_policy_free(pol);
364                                 domain_free(domains);
365                                 return -1;
366                         }
367                         if (c->verbose > 0)
368                                 pr_err("found match on member %s in %s\n",
369                                        content->text_version, devname);
370
371                         /* make sure we finished the loop */
372                         tmpdev = NULL;
373                         goto loop;
374                 } else {
375                         content = *contentp;
376                         tst->ss->getinfo_super(tst, content, NULL);
377
378                         if (!ident_matches(ident, content, tst,
379                                            c->homehost, c->require_homehost,
380                                            c->update,
381                                            report_mismatch ? devname : NULL))
382                                 goto loop;
383
384                         if (auto_assem) {
385                                 /* Never auto-assemble things that conflict
386                                  * with mdadm.conf in some way
387                                  */
388                                 struct mddev_ident *match;
389                                 int rv = 0;
390
391                                 match = conf_match(tst, content, devname,
392                                                    report_mismatch ? c->verbose : -1,
393                                                    &rv);
394                                 if (!match && rv == 2)
395                                         goto loop;
396                                 if (match && match->devname &&
397                                     strcasecmp(match->devname, "<ignore>") == 0) {
398                                         if (report_mismatch)
399                                                 pr_err("%s is a member of an explicitly ignored array\n",
400                                                        devname);
401                                         goto loop;
402                                 }
403                                 if (match && !ident_matches(match, content, tst,
404                                                             c->homehost, c->require_homehost,
405                                                             c->update,
406                                                             report_mismatch ? devname : NULL))
407                                         /* Array exists  in mdadm.conf but some
408                                          * details don't match, so reject it
409                                          */
410                                         goto loop;
411                         }
412
413                         /* should be safe to try an exclusive open now, we
414                          * have rejected anything that some other mdadm might
415                          * be looking at
416                          */
417                         dfd = dev_open(devname, O_RDONLY | O_EXCL);
418                         if (dfd < 0) {
419                                 if (report_mismatch)
420                                         pr_err("%s is busy - skipping\n", devname);
421                                 goto loop;
422                         }
423                         close(dfd);
424
425                         if (st == NULL)
426                                 st = dup_super(tst);
427                         if (st->minor_version == -1)
428                                 st->minor_version = tst->minor_version;
429
430                         if (memcmp(content->uuid, uuid_zero,
431                                    sizeof(int[4])) == 0) {
432                                 /* this is a floating spare.  It cannot define
433                                  * an array unless there are no more arrays of
434                                  * this type to be found.  It can be included
435                                  * in an array of this type though.
436                                  */
437                                 tmpdev->used = 3;
438                                 goto loop;
439                         }
440
441                         if (st->ss != tst->ss ||
442                             st->minor_version != tst->minor_version ||
443                             st->ss->compare_super(st, tst) != 0) {
444                                 /* Some mismatch. If exactly one array matches this host,
445                                  * we can resolve on that one.
446                                  * Or, if we are auto assembling, we just ignore the second
447                                  * for now.
448                                  */
449                                 if (auto_assem)
450                                         goto loop;
451                                 if (c->homehost) {
452                                         int first = st->ss->match_home(st, c->homehost);
453                                         int last = tst->ss->match_home(tst, c->homehost);
454                                         if (first != last &&
455                                             (first == 1 || last == 1)) {
456                                                 /* We can do something */
457                                                 if (first) {/* just ignore this one */
458                                                         if (report_mismatch)
459                                                                 pr_err("%s misses out due to wrong homehost\n",
460                                                                        devname);
461                                                         goto loop;
462                                                 } else { /* reject all those sofar */
463                                                         struct mddev_dev *td;
464                                                         if (report_mismatch)
465                                                                 pr_err("%s overrides previous devices due to good homehost\n",
466                                                                        devname);
467                                                         for (td=devlist; td != tmpdev; td=td->next)
468                                                                 if (td->used == 1)
469                                                                         td->used = 0;
470                                                         tmpdev->used = 1;
471                                                         goto loop;
472                                                 }
473                                         }
474                                 }
475                                 pr_err("superblock on %s doesn't match others - assembly aborted\n",
476                                        devname);
477                                 tst->ss->free_super(tst);
478                                 st->ss->free_super(st);
479                                 dev_policy_free(pol);
480                                 domain_free(domains);
481                                 return -1;
482                         }
483                         tmpdev->used = 1;
484                 }
485         loop:
486                 /* Collect domain information from members only */
487                 if (tmpdev && tmpdev->used == 1) {
488                         if (!pol)
489                                 pol = devid_policy(stb.st_rdev);
490                         domain_merge(&domains, pol, tst?tst->ss->name:NULL);
491                 }
492                 dev_policy_free(pol);
493                 pol = NULL;
494                 if (tst)
495                         tst->ss->free_super(tst);
496         }
497
498         /* Check if we found some imsm spares but no members */
499         if ((auto_assem ||
500              (ident->uuid_set &&
501               memcmp(uuid_zero, ident->uuid,sizeof(uuid_zero)) == 0)) &&
502             (!st || !st->sb))
503                 for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
504                         if (tmpdev->used != 3)
505                                 continue;
506                         tmpdev->used = 1;
507                         content = *contentp;
508
509                         if (!st->sb) {
510                                 /* we need sb from one of the spares */
511                                 int dfd = dev_open(tmpdev->devname, O_RDONLY);
512                                 if (dfd < 0 ||
513                                     st->ss->load_super(st, dfd, NULL))
514                                         tmpdev->used = 2;
515                                 if (dfd > 0)
516                                         close(dfd);
517                         }
518                 }
519
520         /* Now reject spares that don't match domains of identified members */
521         for (tmpdev = devlist; tmpdev; tmpdev = tmpdev->next) {
522                 struct stat stb;
523                 if (tmpdev->used != 3)
524                         continue;
525                 if (stat(tmpdev->devname, &stb)< 0) {
526                         pr_err("fstat failed for %s: %s\n",
527                                tmpdev->devname, strerror(errno));
528                         tmpdev->used = 2;
529                 } else {
530                         struct dev_policy *pol = devid_policy(stb.st_rdev);
531                         int dt = domain_test(domains, pol, NULL);
532                         if (inargv && dt != 0)
533                                 /* take this spare as domains match
534                                  * if there are any */
535                                 tmpdev->used = 1;
536                         else if (!inargv && dt == 1)
537                                 /* device wasn't explicitly listed, so need
538                                  * explicit domain match - which we have */
539                                 tmpdev->used = 1;
540                         else
541                                 /* if domains don't match mark as unused */
542                                 tmpdev->used = 0;
543                         dev_policy_free(pol);
544                 }
545         }
546         domain_free(domains);
547         *stp = st;
548         if (st && st->sb && content == *contentp)
549                 st->ss->getinfo_super(st, content, NULL);
550         *contentp = content;
551
552         return num_devs;
553 }
554
555 struct devs {
556         char *devname;
557         int uptodate; /* set once we decide that this device is as
558                        * recent as everything else in the array.
559                        */
560         int included; /* set if the device is already in the array
561                        * due to a previous '-I'
562                        */
563         struct mdinfo i;
564 };
565
566 static int load_devices(struct devs *devices, char *devmap,
567                         struct mddev_ident *ident, struct supertype **stp,
568                         struct mddev_dev *devlist, struct context *c,
569                         struct mdinfo *content,
570                         int mdfd, char *mddev,
571                         int *most_recentp, int *bestcntp, int **bestp,
572                         int inargv)
573 {
574         struct mddev_dev *tmpdev;
575         int devcnt = 0;
576         int nextspare = 0;
577 #ifndef MDASSEMBLE
578         int bitmap_done = 0;
579 #endif
580         int most_recent = -1;
581         int bestcnt = 0;
582         int *best = *bestp;
583         struct supertype *st = *stp;
584
585         for (tmpdev = devlist; tmpdev; tmpdev=tmpdev->next) {
586                 char *devname = tmpdev->devname;
587                 struct stat stb;
588                 struct supertype *tst;
589                 int i;
590                 int dfd;
591
592                 if (tmpdev->used != 1)
593                         continue;
594                 /* looks like a good enough match to update the super block if needed */
595 #ifndef MDASSEMBLE
596                 if (c->update) {
597                         /* prepare useful information in info structures */
598                         struct stat stb2;
599                         int err;
600                         fstat(mdfd, &stb2);
601
602                         if (strcmp(c->update, "uuid") == 0 && !ident->uuid_set)
603                                 random_uuid((__u8 *)ident->uuid);
604
605                         dfd = dev_open(devname,
606                                        tmpdev->disposition == 'I'
607                                        ? O_RDWR : (O_RDWR|O_EXCL));
608
609                         tst = dup_super(st);
610                         if (dfd < 0 || tst->ss->load_super(tst, dfd, NULL) != 0) {
611                                 pr_err("cannot re-read metadata from %s - aborting\n",
612                                        devname);
613                                 if (dfd >= 0)
614                                         close(dfd);
615                                 close(mdfd);
616                                 free(devices);
617                                 free(devmap);
618                                 tst->ss->free_super(tst);
619                                 free(tst);
620                                 *stp = st;
621                                 return -1;
622                         }
623                         tst->ss->getinfo_super(tst, content, devmap + devcnt * content->array.raid_disks);
624
625                         memcpy(content->uuid, ident->uuid, 16);
626                         strcpy(content->name, ident->name);
627                         content->array.md_minor = minor(stb2.st_rdev);
628
629                         if (strcmp(c->update, "byteorder") == 0)
630                                 err = 0;
631                         else if (strcmp(c->update, "home-cluster") == 0) {
632                                 tst->cluster_name = c->homecluster;
633                                 err = tst->ss->write_bitmap(tst, dfd, NameUpdate);
634                         } else if (strcmp(c->update, "nodes") == 0) {
635                                 tst->nodes = c->nodes;
636                                 err = tst->ss->write_bitmap(tst, dfd, NodeNumUpdate);
637                         } else if (strcmp(c->update, "revert-reshape") == 0 &&
638                                    c->invalid_backup)
639                                 err = tst->ss->update_super(tst, content,
640                                                             "revert-reshape-nobackup",
641                                                             devname, c->verbose,
642                                                             ident->uuid_set,
643                                                             c->homehost);
644                         else
645                                 err = tst->ss->update_super(tst, content, c->update,
646                                                             devname, c->verbose,
647                                                             ident->uuid_set,
648                                                             c->homehost);
649                         if (err < 0) {
650                                 if (err == -1)
651                                         pr_err("--update=%s not understood for %s metadata\n",
652                                                c->update, tst->ss->name);
653                                 tst->ss->free_super(tst);
654                                 free(tst);
655                                 close(mdfd);
656                                 close(dfd);
657                                 free(devices);
658                                 free(devmap);
659                                 *stp = st;
660                                 return -1;
661                         }
662                         if (strcmp(c->update, "uuid")==0 &&
663                             !ident->uuid_set) {
664                                 ident->uuid_set = 1;
665                                 memcpy(ident->uuid, content->uuid, 16);
666                         }
667                         if (tst->ss->store_super(tst, dfd))
668                                 pr_err("Could not re-write superblock on %s.\n",
669                                        devname);
670
671                         if (strcmp(c->update, "uuid")==0 &&
672                             ident->bitmap_fd >= 0 && !bitmap_done) {
673                                 if (bitmap_update_uuid(ident->bitmap_fd,
674                                                        content->uuid,
675                                                        tst->ss->swapuuid) != 0)
676                                         pr_err("Could not update uuid on external bitmap.\n");
677                                 else
678                                         bitmap_done = 1;
679                         }
680                 } else
681 #endif
682                 {
683                         dfd = dev_open(devname,
684                                        tmpdev->disposition == 'I'
685                                        ? O_RDWR : (O_RDWR|O_EXCL));
686                         tst = dup_super(st);
687
688                         if (dfd < 0 || tst->ss->load_super(tst, dfd, NULL) != 0) {
689                                 pr_err("cannot re-read metadata from %s - aborting\n",
690                                        devname);
691                                 if (dfd >= 0)
692                                         close(dfd);
693                                 close(mdfd);
694                                 free(devices);
695                                 free(devmap);
696                                 tst->ss->free_super(tst);
697                                 free(tst);
698                                 *stp = st;
699                                 return -1;
700                         }
701                         tst->ss->getinfo_super(tst, content, devmap + devcnt * content->array.raid_disks);
702                 }
703
704                 fstat(dfd, &stb);
705                 close(dfd);
706
707                 if (c->verbose > 0)
708                         pr_err("%s is identified as a member of %s, slot %d%s.\n",
709                                devname, mddev, content->disk.raid_disk,
710                                (content->disk.state & (1<<MD_DISK_REPLACEMENT)) ? " replacement":"");
711                 devices[devcnt].devname = devname;
712                 devices[devcnt].uptodate = 0;
713                 devices[devcnt].included = (tmpdev->disposition == 'I');
714                 devices[devcnt].i = *content;
715                 devices[devcnt].i.disk.major = major(stb.st_rdev);
716                 devices[devcnt].i.disk.minor = minor(stb.st_rdev);
717
718                 if (devices[devcnt].i.disk.state == 6) {
719                         if (most_recent < 0 ||
720                             devices[devcnt].i.events
721                             > devices[most_recent].i.events) {
722                                 struct supertype *tmp = tst;
723                                 tst = st;
724                                 st = tmp;
725                                 most_recent = devcnt;
726                         }
727                 }
728                 tst->ss->free_super(tst);
729                 free(tst);
730
731                 if (content->array.level == LEVEL_MULTIPATH)
732                         /* with multipath, the raid_disk from the superblock is meaningless */
733                         i = devcnt;
734                 else
735                         i = devices[devcnt].i.disk.raid_disk;
736                 if (i+1 == 0 || i == MD_DISK_ROLE_JOURNAL) {
737                         if (nextspare < content->array.raid_disks*2)
738                                 nextspare = content->array.raid_disks*2;
739                         i = nextspare++;
740                 } else {
741                         /* i is raid_disk - double it so there is room for
742                          * replacements */
743                         i *= 2;
744                         if (devices[devcnt].i.disk.state & (1<<MD_DISK_REPLACEMENT))
745                                 i++;
746                         if (i >= content->array.raid_disks*2 &&
747                             i >= nextspare)
748                                 nextspare = i+1;
749                 }
750                 if (i < 10000) {
751                         if (i >= bestcnt) {
752                                 int newbestcnt = i+10;
753                                 int *newbest = xmalloc(sizeof(int)*newbestcnt);
754                                 int c;
755                                 for (c=0; c < newbestcnt; c++)
756                                         if (c < bestcnt)
757                                                 newbest[c] = best[c];
758                                         else
759                                                 newbest[c] = -1;
760                                 if (best)free(best);
761                                 best = newbest;
762                                 bestcnt = newbestcnt;
763                         }
764                         if (best[i] >=0 &&
765                             devices[best[i]].i.events
766                             == devices[devcnt].i.events
767                             && (devices[best[i]].i.disk.minor
768                                 != devices[devcnt].i.disk.minor)
769                             && st->ss == &super0
770                             && content->array.level != LEVEL_MULTIPATH) {
771                                 /* two different devices with identical superblock.
772                                  * Could be a mis-detection caused by overlapping
773                                  * partitions.  fail-safe.
774                                  */
775                                 pr_err("WARNING %s and %s appear to have very similar superblocks.\n"
776                                        "      If they are really different, please --zero the superblock on one\n"
777                                        "      If they are the same or overlap, please remove one from %s.\n",
778                                        devices[best[i]].devname, devname,
779                                        inargv ? "the list" :
780                                        "the\n      DEVICE list in mdadm.conf"
781                                         );
782                                 close(mdfd);
783                                 free(devices);
784                                 free(devmap);
785                                 *stp = st;
786                                 return -1;
787                         }
788                         if (best[i] == -1
789                             || (devices[best[i]].i.events
790                                 < devices[devcnt].i.events))
791                                 best[i] = devcnt;
792                 }
793                 devcnt++;
794         }
795         if (most_recent >= 0)
796                 *most_recentp = most_recent;
797         *bestcntp = bestcnt;
798         *bestp = best;
799         *stp = st;
800         return devcnt;
801 }
802
803 static int force_array(struct mdinfo *content,
804                        struct devs *devices,
805                        int *best, int bestcnt, char *avail,
806                        int most_recent,
807                        struct supertype *st,
808                        struct context *c)
809 {
810         int okcnt = 0;
811         while (!enough(content->array.level, content->array.raid_disks,
812                        content->array.layout, 1,
813                        avail)
814                ||
815                (content->reshape_active && content->delta_disks > 0 &&
816                 !enough(content->array.level, (content->array.raid_disks
817                                                - content->delta_disks),
818                         content->new_layout, 1,
819                         avail)
820                        )) {
821                 /* Choose the newest best drive which is
822                  * not up-to-date, update the superblock
823                  * and add it.
824                  */
825                 int fd;
826                 struct supertype *tst;
827                 unsigned long long current_events;
828                 int chosen_drive = -1;
829                 int i;
830
831                 for (i = 0;
832                      i < content->array.raid_disks * 2 && i < bestcnt;
833                      i += 2) {
834                         int j = best[i];
835                         if (j < 0)
836                                 continue;
837                         if (devices[j].uptodate)
838                                 continue;
839                         if (devices[j].i.recovery_start != MaxSector) {
840                                 int delta;
841                                 if (!devices[j].i.reshape_active ||
842                                     devices[j].i.delta_disks <= 0)
843                                         continue;
844                                 /* When increasing number of devices, an
845                                  * added device also appears to be
846                                  * recovering.  It is safe to include it
847                                  * as long as it won't be a source of
848                                  * data.
849                                  * For now, just allow for last data
850                                  * devices in RAID4 or last devices in RAID4/5/6.
851                                  */
852                                 delta = devices[j].i.delta_disks;
853                                 if (devices[j].i.array.level >= 4 &&
854                                     devices[j].i.array.level <= 6 &&
855                                     i/2 >= content->array.raid_disks - delta)
856                                         /* OK */;
857                                 else if (devices[j].i.array.level == 4 &&
858                                          i/2 >= content->array.raid_disks - delta - 1)
859                                         /* OK */;
860                                 else
861                                         continue;
862                         }
863                         if (chosen_drive < 0 ||
864                              devices[j].i.events
865                             > devices[chosen_drive].i.events)
866                                 chosen_drive = j;
867                 }
868                 if (chosen_drive < 0)
869                         break;
870                 current_events = devices[chosen_drive].i.events;
871         add_another:
872                 if (c->verbose >= 0)
873                         pr_err("forcing event count in %s(%d) from %d upto %d\n",
874                                devices[chosen_drive].devname,
875                                devices[chosen_drive].i.disk.raid_disk,
876                                (int)(devices[chosen_drive].i.events),
877                                (int)(devices[most_recent].i.events));
878                 fd = dev_open(devices[chosen_drive].devname,
879                               devices[chosen_drive].included ? O_RDWR
880                               : (O_RDWR|O_EXCL));
881                 if (fd < 0) {
882                         pr_err("Couldn't open %s for write - not updating\n",
883                                devices[chosen_drive].devname);
884                         devices[chosen_drive].i.events = 0;
885                         continue;
886                 }
887                 tst = dup_super(st);
888                 if (tst->ss->load_super(tst,fd, NULL)) {
889                         close(fd);
890                         pr_err("RAID superblock disappeared from %s - not updating.\n",
891                                devices[chosen_drive].devname);
892                         devices[chosen_drive].i.events = 0;
893                         continue;
894                 }
895                 content->events = devices[most_recent].i.events;
896                 tst->ss->update_super(tst, content, "force-one",
897                                       devices[chosen_drive].devname, c->verbose,
898                                       0, NULL);
899
900                 if (tst->ss->store_super(tst, fd)) {
901                         close(fd);
902                         pr_err("Could not re-write superblock on %s\n",
903                                devices[chosen_drive].devname);
904                         devices[chosen_drive].i.events = 0;
905                         tst->ss->free_super(tst);
906                         continue;
907                 }
908                 close(fd);
909                 devices[chosen_drive].i.events = devices[most_recent].i.events;
910                 devices[chosen_drive].uptodate = 1;
911                 avail[chosen_drive] = 1;
912                 okcnt++;
913                 tst->ss->free_super(tst);
914                 /* If there are any other drives of the same vintage,
915                  * add them in as well.  We can't lose and we might gain
916                  */
917                 for (i = 0;
918                      i < content->array.raid_disks * 2 && i < bestcnt ;
919                      i += 2) {
920                         int j = best[i];
921                         if (j >= 0 &&
922                             !devices[j].uptodate &&
923                             devices[j].i.recovery_start == MaxSector &&
924                             devices[j].i.events == current_events) {
925                                 chosen_drive = j;
926                                 goto add_another;
927                         }
928                 }
929         }
930         return okcnt;
931 }
932
933 static int start_array(int mdfd,
934                        char *mddev,
935                        struct mdinfo *content,
936                        struct supertype *st,
937                        struct mddev_ident *ident,
938                        int *best, int bestcnt,
939                        int chosen_drive,
940                        struct devs *devices,
941                        unsigned int okcnt,
942                        unsigned int sparecnt,
943                        unsigned int rebuilding_cnt,
944                        unsigned int journalcnt,
945                        struct context *c,
946                        int clean, char *avail,
947                        int start_partial_ok,
948                        int err_ok,
949                        int was_forced
950         )
951 {
952         int rv;
953         int i;
954         unsigned int req_cnt;
955
956         if (content->journal_device_required && (content->journal_clean == 0)) {
957                 if (!c->force) {
958                         pr_err("Not safe to assemble with missing or stale journal device, consider --force.\n");
959                         return 1;
960                 }
961                 pr_err("Journal is missing or stale, starting array read only.\n");
962                 c->readonly = 1;
963         }
964
965         rv = set_array_info(mdfd, st, content);
966         if (rv && !err_ok) {
967                 pr_err("failed to set array info for %s: %s\n",
968                        mddev, strerror(errno));
969                 return 1;
970         }
971         if (ident->bitmap_fd >= 0) {
972                 if (ioctl(mdfd, SET_BITMAP_FILE, ident->bitmap_fd) != 0) {
973                         pr_err("SET_BITMAP_FILE failed.\n");
974                         return 1;
975                 }
976         } else if (ident->bitmap_file) {
977                 /* From config file */
978                 int bmfd = open(ident->bitmap_file, O_RDWR);
979                 if (bmfd < 0) {
980                         pr_err("Could not open bitmap file %s\n",
981                                ident->bitmap_file);
982                         return 1;
983                 }
984                 if (ioctl(mdfd, SET_BITMAP_FILE, bmfd) != 0) {
985                         pr_err("Failed to set bitmapfile for %s\n", mddev);
986                         close(bmfd);
987                         return 1;
988                 }
989                 close(bmfd);
990         }
991
992         /* First, add the raid disks, but add the chosen one last */
993         for (i=0; i<= bestcnt; i++) {
994                 int j;
995                 if (i < bestcnt) {
996                         j = best[i];
997                         if (j == chosen_drive)
998                                 continue;
999                 } else
1000                         j = chosen_drive;
1001
1002                 if (j >= 0 && !devices[j].included) {
1003                         int dfd = dev_open(devices[j].devname,
1004                                            O_RDWR|O_EXCL);
1005                         if (dfd >= 0) {
1006                                 remove_partitions(dfd);
1007                                 close(dfd);
1008                         }
1009                         rv = add_disk(mdfd, st, content, &devices[j].i);
1010
1011                         if (rv) {
1012                                 pr_err("failed to add %s to %s: %s\n",
1013                                        devices[j].devname,
1014                                        mddev,
1015                                        strerror(errno));
1016                                 if (i < content->array.raid_disks * 2
1017                                     || i == bestcnt)
1018                                         okcnt--;
1019                                 else
1020                                         sparecnt--;
1021                         } else if (c->verbose > 0)
1022                                 pr_err("added %s to %s as %d%s%s\n",
1023                                        devices[j].devname, mddev,
1024                                        devices[j].i.disk.raid_disk,
1025                                        devices[j].uptodate?"":
1026                                        " (possibly out of date)",
1027                                        (devices[j].i.disk.state & (1<<MD_DISK_REPLACEMENT))?" replacement":"");
1028                 } else if (j >= 0) {
1029                         if (c->verbose > 0)
1030                                 pr_err("%s is already in %s as %d\n",
1031                                        devices[j].devname, mddev,
1032                                        devices[j].i.disk.raid_disk);
1033                 } else if (c->verbose > 0 && i < content->array.raid_disks*2
1034                            && (i&1) == 0)
1035                         pr_err("no uptodate device for slot %d of %s\n",
1036                                i/2, mddev);
1037         }
1038
1039         if (content->array.level == LEVEL_CONTAINER) {
1040                 if (c->verbose >= 0) {
1041                         pr_err("Container %s has been assembled with %d drive%s",
1042                                mddev, okcnt+sparecnt+journalcnt,
1043                                okcnt+sparecnt+journalcnt==1?"":"s");
1044                         if (okcnt < (unsigned)content->array.raid_disks)
1045                                 fprintf(stderr, " (out of %d)",
1046                                         content->array.raid_disks);
1047                         fprintf(stderr, "\n");
1048                 }
1049
1050                 if (st->ss->validate_container) {
1051                         struct mdinfo *devices_list;
1052                         struct mdinfo *info_devices = xmalloc(sizeof(struct mdinfo)*(okcnt+sparecnt));
1053                         unsigned int count;
1054                         devices_list = NULL;
1055                         for (count = 0; count < okcnt+sparecnt; count++) {
1056                                 info_devices[count] = devices[count].i;
1057                                 info_devices[count].next = devices_list;
1058                                 devices_list = &info_devices[count];
1059                         }
1060                         if (st->ss->validate_container(devices_list))
1061                                 pr_err("Mismatch detected!\n");
1062                         free(info_devices);
1063                 }
1064
1065                 st->ss->free_super(st);
1066                 sysfs_uevent(content, "change");
1067                 if (err_ok && okcnt < (unsigned)content->array.raid_disks)
1068                         /* Was partial, is still partial, so signal an error
1069                          * to ensure we don't retry */
1070                         return 1;
1071                 return 0;
1072         }
1073
1074         /* Get number of in-sync devices according to the superblock.
1075          * We must have this number to start the array without -s or -R
1076          */
1077         req_cnt = content->array.working_disks;
1078
1079         if (c->runstop == 1 ||
1080             (c->runstop <= 0 &&
1081              ( enough(content->array.level, content->array.raid_disks,
1082                       content->array.layout, clean, avail) &&
1083                (okcnt + rebuilding_cnt >= req_cnt || start_partial_ok)
1084                      ))) {
1085                 /* This array is good-to-go.
1086                  * If a reshape is in progress then we might need to
1087                  * continue monitoring it.  In that case we start
1088                  * it read-only and let the grow code make it writable.
1089                  */
1090                 int rv;
1091 #ifndef MDASSEMBLE
1092                 if (content->reshape_active &&
1093                     !(content->reshape_active & RESHAPE_NO_BACKUP) &&
1094                     content->delta_disks <= 0) {
1095                         if (!c->backup_file) {
1096                                 pr_err("%s: Need a backup file to complete reshape of this array.\n",
1097                                        mddev);
1098                                 pr_err("Please provided one with \"--backup-file=...\"\n");
1099                                 if (c->update &&
1100                                     strcmp(c->update, "revert-reshape") == 0)
1101                                         pr_err("(Don't specify --update=revert-reshape again, that part succeeded.)\n");
1102                                 return 1;
1103                         }
1104                         rv = sysfs_set_str(content, NULL,
1105                                            "array_state", "readonly");
1106                         if (rv == 0)
1107                                 rv = Grow_continue(mdfd, st, content,
1108                                                    c->backup_file, 0,
1109                                                    c->freeze_reshape);
1110                 } else if (c->readonly &&
1111                            sysfs_attribute_available(
1112                                    content, NULL, "array_state")) {
1113                         rv = sysfs_set_str(content, NULL,
1114                                            "array_state", "readonly");
1115                 } else
1116 #endif
1117                         rv = ioctl(mdfd, RUN_ARRAY, NULL);
1118                 reopen_mddev(mdfd); /* drop O_EXCL */
1119                 if (rv == 0) {
1120                         if (c->verbose >= 0) {
1121                                 pr_err("%s has been started with %d drive%s",
1122                                        mddev, okcnt, okcnt==1?"":"s");
1123                                 if (okcnt < (unsigned)content->array.raid_disks)
1124                                         fprintf(stderr, " (out of %d)", content->array.raid_disks);
1125                                 if (rebuilding_cnt)
1126                                         fprintf(stderr, "%s %d rebuilding", sparecnt?",":" and", rebuilding_cnt);
1127                                 if (sparecnt)
1128                                         fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
1129                                 if (content->journal_clean)
1130                                         fprintf(stderr, " and %d journal", journalcnt);
1131                                 fprintf(stderr, ".\n");
1132                         }
1133                         if (content->reshape_active &&
1134                             content->array.level >= 4 &&
1135                             content->array.level <= 6) {
1136                                 /* might need to increase the size
1137                                  * of the stripe cache - default is 256
1138                                  */
1139                                 int chunk_size = content->array.chunk_size;
1140                                 if (content->reshape_active &&
1141                                     content->new_chunk > chunk_size)
1142                                         chunk_size = content->new_chunk;
1143                                 if (256 < 4 * ((chunk_size+4065)/4096)) {
1144                                         struct mdinfo *sra = sysfs_read(mdfd, NULL, 0);
1145                                         if (sra)
1146                                                 sysfs_set_num(sra, NULL,
1147                                                               "stripe_cache_size",
1148                                                               (4 * chunk_size / 4096) + 1);
1149                                         sysfs_free(sra);
1150                                 }
1151                         }
1152                         if (okcnt < (unsigned)content->array.raid_disks) {
1153                                 /* If any devices did not get added
1154                                  * because the kernel rejected them based
1155                                  * on event count, try adding them
1156                                  * again providing the action policy is
1157                                  * 're-add' or greater.  The bitmap
1158                                  * might allow them to be included, or
1159                                  * they will become spares.
1160                                  */
1161                                 for (i = 0; i < bestcnt; i++) {
1162                                         int j = best[i];
1163                                         if (j >= 0 && !devices[j].uptodate) {
1164                                                 if (!disk_action_allows(&devices[j].i, st->ss->name, act_re_add))
1165                                                         continue;
1166                                                 rv = add_disk(mdfd, st, content,
1167                                                               &devices[j].i);
1168                                                 if (rv == 0 && c->verbose >= 0)
1169                                                         pr_err("%s has been re-added.\n",
1170                                                                devices[j].devname);
1171                                         }
1172                                 }
1173                         }
1174                         if (content->array.level == 6 &&
1175                             okcnt + 1 == (unsigned)content->array.raid_disks &&
1176                             was_forced) {
1177                                 struct mdinfo *sra = sysfs_read(mdfd, NULL, 0);
1178                                 if (sra)
1179                                         sysfs_set_str(sra, NULL,
1180                                                       "sync_action", "repair");
1181                                 sysfs_free(sra);
1182                         }
1183                         return 0;
1184                 }
1185                 pr_err("failed to RUN_ARRAY %s: %s\n",
1186                        mddev, strerror(errno));
1187
1188                 if (!enough(content->array.level, content->array.raid_disks,
1189                             content->array.layout, 1, avail))
1190                         pr_err("Not enough devices to start the array.\n");
1191                 else if (!enough(content->array.level,
1192                                  content->array.raid_disks,
1193                                  content->array.layout, clean,
1194                                  avail))
1195                         pr_err("Not enough devices to start the array while not clean - consider --force.\n");
1196
1197                 return 1;
1198         }
1199         if (c->runstop == -1) {
1200                 pr_err("%s assembled from %d drive%s",
1201                        mddev, okcnt, okcnt==1?"":"s");
1202                 if (okcnt != (unsigned)content->array.raid_disks)
1203                         fprintf(stderr, " (out of %d)", content->array.raid_disks);
1204                 fprintf(stderr, ", but not started.\n");
1205                 return 2;
1206         }
1207         if (c->verbose >= -1) {
1208                 pr_err("%s assembled from %d drive%s", mddev, okcnt, okcnt==1?"":"s");
1209                 if (rebuilding_cnt)
1210                         fprintf(stderr, "%s %d rebuilding", sparecnt?",":" and", rebuilding_cnt);
1211                 if (sparecnt)
1212                         fprintf(stderr, " and %d spare%s", sparecnt, sparecnt==1?"":"s");
1213                 if (!enough(content->array.level, content->array.raid_disks,
1214                             content->array.layout, 1, avail))
1215                         fprintf(stderr, " - not enough to start the array.\n");
1216                 else if (!enough(content->array.level,
1217                                  content->array.raid_disks,
1218                                  content->array.layout, clean,
1219                                  avail))
1220                         fprintf(stderr, " - not enough to start the array while not clean - consider --force.\n");
1221                 else {
1222                         if (req_cnt == (unsigned)content->array.raid_disks)
1223                                 fprintf(stderr, " - need all %d to start it", req_cnt);
1224                         else
1225                                 fprintf(stderr, " - need %d to start", req_cnt);
1226                         fprintf(stderr, " (use --run to insist).\n");
1227                 }
1228         }
1229         return 1;
1230 }
1231
1232 int Assemble(struct supertype *st, char *mddev,
1233              struct mddev_ident *ident,
1234              struct mddev_dev *devlist,
1235              struct context *c)
1236 {
1237         /*
1238          * The task of Assemble is to find a collection of
1239          * devices that should (according to their superblocks)
1240          * form an array, and to give this collection to the MD driver.
1241          * In Linux-2.4 and later, this involves submitting a
1242          * SET_ARRAY_INFO ioctl with no arg - to prepare
1243          * the array - and then submit a number of
1244          * ADD_NEW_DISK ioctls to add disks into
1245          * the array.  Finally RUN_ARRAY might
1246          * be submitted to start the array.
1247          *
1248          * Much of the work of Assemble is in finding and/or
1249          * checking the disks to make sure they look right.
1250          *
1251          * If mddev is not set, then scan must be set and we
1252          *  read through the config file for dev+uuid mapping
1253          *  We recurse, setting mddev, for each device that
1254          *    - isn't running
1255          *    - has a valid uuid (or any uuid if !uuidset)
1256          *
1257          * If mddev is set, we try to determine state of md.
1258          *   check version - must be at least 0.90.0
1259          *   check kernel version.  must be at least 2.4.
1260          *    If not, we can possibly fall back on START_ARRAY
1261          *   Try to GET_ARRAY_INFO.
1262          *     If possible, give up
1263          *     If not, try to STOP_ARRAY just to make sure
1264          *
1265          * If !uuidset and scan, look in conf-file for uuid
1266          *       If not found, give up
1267          * If !devlist and scan and uuidset, get list of devs from conf-file
1268          *
1269          * For each device:
1270          *   Check superblock - discard if bad
1271          *   Check uuid (set if we don't have one) - discard if no match
1272          *   Check superblock similarity if we have a superblock - discard if different
1273          *   Record events, devicenum
1274          * This should give us a list of devices for the array
1275          * We should collect the most recent event number
1276          *
1277          * Count disks with recent enough event count
1278          * While force && !enough disks
1279          *    Choose newest rejected disks, update event count
1280          *     mark clean and rewrite superblock
1281          * If recent kernel:
1282          *    SET_ARRAY_INFO
1283          *    foreach device with recent events : ADD_NEW_DISK
1284          *    if runstop == 1 || "enough" disks and runstop==0 -> RUN_ARRAY
1285          * If old kernel:
1286          *    Check the device numbers in superblock are right
1287          *    update superblock if any changes
1288          *    START_ARRAY
1289          *
1290          */
1291         int rv;
1292         int mdfd;
1293         int clean;
1294         int auto_assem = (mddev == NULL && !ident->uuid_set &&
1295                           ident->super_minor == UnSet && ident->name[0] == 0
1296                           && (ident->container == NULL || ident->member == NULL));
1297         struct devs *devices;
1298         char *devmap;
1299         int *best = NULL; /* indexed by raid_disk */
1300         int bestcnt = 0;
1301         int devcnt;
1302         unsigned int okcnt, sparecnt, rebuilding_cnt, replcnt, journalcnt;
1303         int journal_clean = 0;
1304         int i;
1305         int was_forced = 0;
1306         int most_recent = 0;
1307         int chosen_drive;
1308         int change = 0;
1309         int inargv = 0;
1310         int start_partial_ok = (c->runstop >= 0) &&
1311                 (c->force || devlist==NULL || auto_assem);
1312         int num_devs;
1313         struct mddev_dev *tmpdev;
1314         struct mdinfo info;
1315         struct mdinfo *content = NULL;
1316         struct mdinfo *pre_exist = NULL;
1317         char *avail;
1318         char *name = NULL;
1319         char chosen_name[1024];
1320         struct map_ent *map = NULL;
1321         struct map_ent *mp;
1322
1323         /*
1324          * If any subdevs are listed, then any that don't
1325          * match ident are discarded.  Remainder must all match and
1326          * become the array.
1327          * If no subdevs, then we scan all devices in the config file, but
1328          * there must be something in the identity
1329          */
1330
1331         if (!devlist &&
1332             ident->uuid_set == 0 &&
1333             (ident->super_minor < 0 || ident->super_minor == UnSet) &&
1334             ident->name[0] == 0 &&
1335             (ident->container == NULL || ident->member == NULL) &&
1336             ident->devices == NULL) {
1337                 pr_err("No identity information available for %s - cannot assemble.\n",
1338                        mddev ? mddev : "further assembly");
1339                 return 1;
1340         }
1341
1342         if (devlist == NULL)
1343                 devlist = conf_get_devs();
1344         else if (mddev)
1345                 inargv = 1;
1346
1347 try_again:
1348         /* We come back here when doing auto-assembly and attempting some
1349          * set of devices failed.  Those are now marked as ->used==2 and
1350          * we ignore them and try again
1351          */
1352         if (!st && ident->st)
1353                 st = ident->st;
1354         if (c->verbose>0)
1355                 pr_err("looking for devices for %s\n",
1356                        mddev ? mddev : "further assembly");
1357
1358         content = &info;
1359         if (st && c->force)
1360                 st->ignore_hw_compat = 1;
1361         num_devs = select_devices(devlist, ident, &st, &content, c,
1362                                   inargv, auto_assem);
1363         if (num_devs < 0)
1364                 return 1;
1365
1366         if (!st || !st->sb || !content)
1367                 return 2;
1368
1369         /* We have a full set of devices - we now need to find the
1370          * array device.
1371          * However there is a risk that we are racing with "mdadm -I"
1372          * and the array is already partially assembled - we will have
1373          * rejected any devices already in this address.
1374          * So we take a lock on the map file - to prevent further races -
1375          * and look for the uuid in there.  If found and the array is
1376          * active, we abort.  If found and the array is not active
1377          * we commit to that md device and add all the contained devices
1378          * to our list.  We flag them so that we don't try to re-add,
1379          * but can remove if they turn out to not be wanted.
1380          */
1381         if (map_lock(&map))
1382                 pr_err("failed to get exclusive lock on mapfile - continue anyway...\n");
1383         if (c->update && strcmp(c->update,"uuid") == 0)
1384                 mp = NULL;
1385         else
1386                 mp = map_by_uuid(&map, content->uuid);
1387         if (mp) {
1388                 struct mdinfo *dv;
1389                 /* array already exists. */
1390                 pre_exist = sysfs_read(-1, mp->devnm, GET_LEVEL|GET_DEVS);
1391                 if (pre_exist->array.level != UnSet) {
1392                         pr_err("Found some drive for an array that is already active: %s\n",
1393                                mp->path);
1394                         pr_err("giving up.\n");
1395                         return 1;
1396                 }
1397                 for (dv = pre_exist->devs; dv; dv = dv->next) {
1398                         /* We want to add this device to our list,
1399                          * but it could already be there if "mdadm -I"
1400                          * started *after* we checked for O_EXCL.
1401                          * If we add it to the top of the list
1402                          * it will be preferred over later copies.
1403                          */
1404                         struct mddev_dev *newdev;
1405                         char *devname = map_dev(dv->disk.major,
1406                                                 dv->disk.minor,
1407                                                 0);
1408                         if (!devname)
1409                                 continue;
1410                         newdev = xmalloc(sizeof(*newdev));
1411                         newdev->devname = devname;
1412                         newdev->disposition = 'I';
1413                         newdev->used = 1;
1414                         newdev->next = devlist;
1415                         devlist = newdev;
1416                         num_devs++;
1417                 }
1418                 strcpy(chosen_name, mp->path);
1419                 if (c->verbose > 0 || mddev == NULL ||
1420                     strcmp(mddev, chosen_name) != 0)
1421                         pr_err("Merging with already-assembled %s\n",
1422                                chosen_name);
1423                 mdfd = open_dev_excl(mp->devnm);
1424         } else {
1425                 int trustworthy = FOREIGN;
1426                 name = content->name;
1427                 switch (st->ss->match_home(st, c->homehost)
1428                         ?: st->ss->match_home(st, "any")) {
1429                 case 1:
1430                         trustworthy = LOCAL;
1431                         name = strchr(content->name, ':');
1432                         if (name)
1433                                 name++;
1434                         else
1435                                 name = content->name;
1436                         break;
1437                 }
1438                 if (!auto_assem)
1439                         /* If the array is listed in mdadm.conf or on
1440                          * command line, then we trust the name
1441                          * even if the array doesn't look local
1442                          */
1443                         trustworthy = LOCAL;
1444
1445                 if (name[0] == 0 &&
1446                     content->array.level == LEVEL_CONTAINER) {
1447                         name = content->text_version;
1448                         trustworthy = METADATA;
1449                 }
1450
1451                 if (name[0] && trustworthy != LOCAL &&
1452                     ! c->require_homehost &&
1453                     conf_name_is_free(name))
1454                         trustworthy = LOCAL;
1455
1456                 if (trustworthy == LOCAL &&
1457                     strchr(name, ':'))
1458                         /* Ignore 'host:' prefix of name */
1459                         name = strchr(name, ':')+1;
1460
1461                 mdfd = create_mddev(mddev, name, ident->autof, trustworthy,
1462                                     chosen_name);
1463         }
1464         if (mdfd < 0) {
1465                 st->ss->free_super(st);
1466                 if (auto_assem)
1467                         goto try_again;
1468                 return 1;
1469         }
1470         mddev = chosen_name;
1471         if (get_linux_version() < 2004000 ||
1472             md_get_version(mdfd) < 9000) {
1473                 pr_err("Assemble requires Linux 2.4 or later, and\n"
1474                        "     md driver version 0.90.0 or later.\n"
1475                        "    Upgrade your kernel or try --build\n");
1476                 close(mdfd);
1477                 return 1;
1478         }
1479         if (pre_exist == NULL) {
1480                 if (mddev_busy(fd2devnm(mdfd))) {
1481                         pr_err("%s already active, cannot restart it!\n",
1482                                mddev);
1483                         for (tmpdev = devlist ;
1484                              tmpdev && tmpdev->used != 1;
1485                              tmpdev = tmpdev->next)
1486                                 ;
1487                         if (tmpdev && auto_assem)
1488                                 pr_err("%s needed for %s...\n",
1489                                        mddev, tmpdev->devname);
1490                         close(mdfd);
1491                         mdfd = -3;
1492                         st->ss->free_super(st);
1493                         if (auto_assem)
1494                                 goto try_again;
1495                         return 1;
1496                 }
1497                 /* just incase it was started but has no content */
1498                 ioctl(mdfd, STOP_ARRAY, NULL);
1499         }
1500
1501 #ifndef MDASSEMBLE
1502         if (content != &info) {
1503                 /* This is a member of a container.  Try starting the array. */
1504                 int err;
1505                 err = assemble_container_content(st, mdfd, content, c,
1506                                                  chosen_name, NULL);
1507                 close(mdfd);
1508                 return err;
1509         }
1510 #endif
1511         /* Ok, no bad inconsistancy, we can try updating etc */
1512         devices = xcalloc(num_devs, sizeof(*devices));
1513         devmap = xcalloc(num_devs, content->array.raid_disks);
1514         devcnt = load_devices(devices, devmap, ident, &st, devlist,
1515                               c, content, mdfd, mddev,
1516                               &most_recent, &bestcnt, &best, inargv);
1517         if (devcnt < 0)
1518                 return 1;
1519
1520         if (devcnt == 0) {
1521                 pr_err("no devices found for %s\n",
1522                        mddev);
1523                 if (st)
1524                         st->ss->free_super(st);
1525                 close(mdfd);
1526                 free(devices);
1527                 free(devmap);
1528                 return 1;
1529         }
1530
1531         if (c->update && strcmp(c->update, "byteorder")==0)
1532                 st->minor_version = 90;
1533
1534         st->ss->getinfo_super(st, content, NULL);
1535         clean = content->array.state & 1;
1536
1537         /* now we have some devices that might be suitable.
1538          * I wonder how many
1539          */
1540         avail = xcalloc(content->array.raid_disks, 1);
1541         okcnt = 0;
1542         replcnt = 0;
1543         sparecnt=0;
1544         journalcnt=0;
1545         rebuilding_cnt=0;
1546         for (i=0; i< bestcnt; i++) {
1547                 int j = best[i];
1548                 int event_margin = 1; /* always allow a difference of '1'
1549                                        * like the kernel does
1550                                        */
1551                 if (j < 0) continue;
1552                 /* note: we ignore error flags in multipath arrays
1553                  * as they don't make sense
1554                  */
1555                 if (content->array.level != LEVEL_MULTIPATH) {
1556                         if (devices[j].i.disk.state & (1<<MD_DISK_JOURNAL)) {
1557                                 if (content->journal_device_required)
1558                                         journalcnt++;
1559                                 else    /* unexpected journal, mark as faulty */
1560                                         devices[j].i.disk.state |= (1<<MD_DISK_FAULTY);
1561                         } else if (!(devices[j].i.disk.state & (1<<MD_DISK_ACTIVE))) {
1562                                 if (!(devices[j].i.disk.state
1563                                       & (1<<MD_DISK_FAULTY))) {
1564                                         devices[j].uptodate = 1;
1565                                         sparecnt++;
1566                                 }
1567                                 continue;
1568                         }
1569                 }
1570                 /* If this device thinks that 'most_recent' has failed, then
1571                  * we must reject this device.
1572                  */
1573                 if (j != most_recent && !c->force &&
1574                     content->array.raid_disks > 0 &&
1575                     devices[most_recent].i.disk.raid_disk >= 0 &&
1576                     devmap[j * content->array.raid_disks + devices[most_recent].i.disk.raid_disk] == 0) {
1577                         if (c->verbose > -1)
1578                                 pr_err("ignoring %s as it reports %s as failed\n",
1579                                        devices[j].devname, devices[most_recent].devname);
1580                         best[i] = -1;
1581                         continue;
1582                 }
1583                 /* Require event counter to be same as, or just less than,
1584                  * most recent.  If it is bigger, it must be a stray spare and
1585                  * should be ignored.
1586                  */
1587                 if (devices[j].i.events+event_margin >=
1588                     devices[most_recent].i.events &&
1589                     devices[j].i.events <=
1590                     devices[most_recent].i.events
1591                         ) {
1592                         devices[j].uptodate = 1;
1593                         if (devices[j].i.disk.state & (1<<MD_DISK_JOURNAL))
1594                                 journal_clean = 1;
1595                         if (i < content->array.raid_disks * 2) {
1596                                 if (devices[j].i.recovery_start == MaxSector ||
1597                                     (content->reshape_active &&
1598                                      i >= content->array.raid_disks - content->delta_disks)) {
1599                                         if (!avail[i/2]) {
1600                                                 okcnt++;
1601                                                 avail[i/2]=1;
1602                                         } else
1603                                                 replcnt++;
1604                                 } else
1605                                         rebuilding_cnt++;
1606                         } else if (devices[j].i.disk.raid_disk != MD_DISK_ROLE_JOURNAL)
1607                                 sparecnt++;
1608                 }
1609         }
1610         free(devmap);
1611         if (c->force) {
1612                 int force_ok = force_array(content, devices, best, bestcnt,
1613                                            avail, most_recent, st, c);
1614                 okcnt += force_ok;
1615                 if (force_ok)
1616                         was_forced = 1;
1617         }
1618         /* Now we want to look at the superblock which the kernel will base things on
1619          * and compare the devices that we think are working with the devices that the
1620          * superblock thinks are working.
1621          * If there are differences and --force is given, then update this chosen
1622          * superblock.
1623          */
1624         chosen_drive = -1;
1625         st->ss->free_super(st);
1626         for (i=0; chosen_drive < 0 && i<bestcnt; i+=2) {
1627                 int j = best[i];
1628                 int fd;
1629
1630                 if (j<0)
1631                         continue;
1632                 if (!devices[j].uptodate)
1633                         continue;
1634                 if (devices[j].i.events < devices[most_recent].i.events)
1635                         continue;
1636                 chosen_drive = j;
1637                 if ((fd=dev_open(devices[j].devname,
1638                                  devices[j].included ? O_RDONLY
1639                                  : (O_RDONLY|O_EXCL)))< 0) {
1640                         pr_err("Cannot open %s: %s\n",
1641                                devices[j].devname, strerror(errno));
1642                         close(mdfd);
1643                         free(devices);
1644                         return 1;
1645                 }
1646                 if (st->ss->load_super(st,fd, NULL)) {
1647                         close(fd);
1648                         pr_err("RAID superblock has disappeared from %s\n",
1649                                devices[j].devname);
1650                         close(mdfd);
1651                         free(devices);
1652                         return 1;
1653                 }
1654                 close(fd);
1655         }
1656         if (st->sb == NULL) {
1657                 pr_err("No suitable drives found for %s\n", mddev);
1658                 close(mdfd);
1659                 free(devices);
1660                 return 1;
1661         }
1662         st->ss->getinfo_super(st, content, NULL);
1663 #ifndef MDASSEMBLE
1664         sysfs_init(content, mdfd, NULL);
1665 #endif
1666         /* after reload context, store journal_clean in context */
1667         content->journal_clean = journal_clean;
1668         for (i=0; i<bestcnt; i++) {
1669                 int j = best[i];
1670                 unsigned int desired_state;
1671
1672                 if (devices[j].i.disk.raid_disk == MD_DISK_ROLE_JOURNAL)
1673                         desired_state = (1<<MD_DISK_JOURNAL);
1674                 else if (i >= content->array.raid_disks * 2)
1675                         desired_state = 0;
1676                 else if (i & 1)
1677                         desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_REPLACEMENT);
1678                 else
1679                         desired_state = (1<<MD_DISK_ACTIVE) | (1<<MD_DISK_SYNC);
1680
1681                 if (j<0)
1682                         continue;
1683                 if (!devices[j].uptodate)
1684                         continue;
1685
1686                 devices[j].i.disk.state = desired_state;
1687                 if (!(devices[j].i.array.state & 1))
1688                         clean = 0;
1689
1690                 if (st->ss->update_super(st, &devices[j].i, "assemble", NULL,
1691                                          c->verbose, 0, NULL)) {
1692                         if (c->force) {
1693                                 if (c->verbose >= 0)
1694                                         pr_err("clearing FAULTY flag for device %d in %s for %s\n",
1695                                                j, mddev, devices[j].devname);
1696                                 change = 1;
1697                         } else {
1698                                 if (c->verbose >= -1)
1699                                         pr_err("device %d in %s has wrong state in superblock, but %s seems ok\n",
1700                                                i, mddev, devices[j].devname);
1701                         }
1702                 }
1703 #if 0
1704                 if (!(super.disks[i].i.disk.state & (1 << MD_DISK_FAULTY))) {
1705                         pr_err("devices %d of %s is not marked FAULTY in superblock, but cannot be found\n",
1706                                i, mddev);
1707                 }
1708 #endif
1709         }
1710         if (c->force && !clean &&
1711             !enough(content->array.level, content->array.raid_disks,
1712                     content->array.layout, clean,
1713                     avail)) {
1714                 change += st->ss->update_super(st, content, "force-array",
1715                                                devices[chosen_drive].devname, c->verbose,
1716                                                0, NULL);
1717                 was_forced = 1;
1718                 clean = 1;
1719         }
1720
1721         if (change) {
1722                 int fd;
1723                 fd = dev_open(devices[chosen_drive].devname,
1724                               devices[chosen_drive].included ?
1725                               O_RDWR : (O_RDWR|O_EXCL));
1726                 if (fd < 0) {
1727                         pr_err("Could not open %s for write - cannot Assemble array.\n",
1728                                devices[chosen_drive].devname);
1729                         close(mdfd);
1730                         free(devices);
1731                         return 1;
1732                 }
1733                 if (st->ss->store_super(st, fd)) {
1734                         close(fd);
1735                         pr_err("Could not re-write superblock on %s\n",
1736                                devices[chosen_drive].devname);
1737                         close(mdfd);
1738                         free(devices);
1739                         return 1;
1740                 }
1741                 if (c->verbose >= 0)
1742                         pr_err("Marking array %s as 'clean'\n",
1743                                mddev);
1744                 close(fd);
1745         }
1746
1747         /* If we are in the middle of a reshape we may need to restore saved data
1748          * that was moved aside due to the reshape overwriting live data
1749          * The code of doing this lives in Grow.c
1750          */
1751 #ifndef MDASSEMBLE
1752         if (content->reshape_active &&
1753             !(content->reshape_active & RESHAPE_NO_BACKUP)) {
1754                 int err = 0;
1755                 int *fdlist = xmalloc(sizeof(int)* bestcnt);
1756                 if (c->verbose > 0)
1757                         pr_err("%s has an active reshape - checking if critical section needs to be restored\n",
1758                                chosen_name);
1759                 if (!c->backup_file)
1760                         c->backup_file = locate_backup(content->sys_name);
1761                 enable_fds(bestcnt/2);
1762                 for (i = 0; i < bestcnt/2; i++) {
1763                         int j = best[i*2];
1764                         if (j >= 0) {
1765                                 fdlist[i] = dev_open(devices[j].devname,
1766                                                      devices[j].included
1767                                                      ? O_RDWR : (O_RDWR|O_EXCL));
1768                                 if (fdlist[i] < 0) {
1769                                         pr_err("Could not open %s for write - cannot Assemble array.\n",
1770                                                devices[j].devname);
1771                                         err = 1;
1772                                         break;
1773                                 }
1774                         } else
1775                                 fdlist[i] = -1;
1776                 }
1777                 if (!err) {
1778                         if (st->ss->external && st->ss->recover_backup)
1779                                 err = st->ss->recover_backup(st, content);
1780                         else
1781                                 err = Grow_restart(st, content, fdlist, bestcnt/2,
1782                                                    c->backup_file, c->verbose > 0);
1783                         if (err && c->invalid_backup) {
1784                                 if (c->verbose > 0)
1785                                         pr_err("continuing without restoring backup\n");
1786                                 err = 0;
1787                         }
1788                 }
1789                 while (i>0) {
1790                         i--;
1791                         if (fdlist[i]>=0) close(fdlist[i]);
1792                 }
1793                 free(fdlist);
1794                 if (err) {
1795                         pr_err("Failed to restore critical section for reshape, sorry.\n");
1796                         if (c->backup_file == NULL)
1797                                 cont_err("Possibly you needed to specify the --backup-file\n");
1798                         close(mdfd);
1799                         free(devices);
1800                         return err;
1801                 }
1802         }
1803 #endif
1804
1805         /* Almost ready to actually *do* something */
1806         /* First, fill in the map, so that udev can find our name
1807          * as soon as we become active.
1808          */
1809         if (c->update && strcmp(c->update, "metadata")==0) {
1810                 content->array.major_version = 1;
1811                 content->array.minor_version = 0;
1812                 strcpy(content->text_version, "1.0");
1813         }
1814
1815         map_update(&map, fd2devnm(mdfd), content->text_version,
1816                    content->uuid, chosen_name);
1817
1818         rv = start_array(mdfd, mddev, content,
1819                          st, ident, best, bestcnt,
1820                          chosen_drive, devices, okcnt, sparecnt,
1821                          rebuilding_cnt, journalcnt,
1822                          c,
1823                          clean, avail, start_partial_ok,
1824                          pre_exist != NULL,
1825                          was_forced);
1826         if (rv == 1 && !pre_exist)
1827                 ioctl(mdfd, STOP_ARRAY, NULL);
1828         free(devices);
1829         map_unlock(&map);
1830         if (rv == 0) {
1831                 wait_for(chosen_name, mdfd);
1832                 close(mdfd);
1833                 if (auto_assem) {
1834                         int usecs = 1;
1835                         /* There is a nasty race with 'mdadm --monitor'.
1836                          * If it opens this device before we close it,
1837                          * it gets an incomplete open on which IO
1838                          * doesn't work and the capacity is
1839                          * wrong.
1840                          * If we reopen (to check for layered devices)
1841                          * before --monitor closes, we loose.
1842                          *
1843                          * So: wait upto 1 second for there to be
1844                          * a non-zero capacity.
1845                          */
1846                         while (usecs < 1000) {
1847                                 mdfd = open(mddev, O_RDONLY);
1848                                 if (mdfd >= 0) {
1849                                         unsigned long long size;
1850                                         if (get_dev_size(mdfd, NULL, &size) &&
1851                                             size > 0)
1852                                                 break;
1853                                         close(mdfd);
1854                                 }
1855                                 usleep(usecs);
1856                                 usecs <<= 1;
1857                         }
1858                 }
1859         } else
1860                 close(mdfd);
1861
1862         /* '2' means 'OK, but not started yet' */
1863         return rv == 2 ? 0 : rv;
1864 }
1865
1866 #ifndef MDASSEMBLE
1867 int assemble_container_content(struct supertype *st, int mdfd,
1868                                struct mdinfo *content, struct context *c,
1869                                char *chosen_name, int *result)
1870 {
1871         struct mdinfo *dev, *sra, *dev2;
1872         int working = 0, preexist = 0;
1873         int expansion = 0;
1874         int old_raid_disks;
1875         int start_reshape;
1876         char *avail;
1877         int err;
1878
1879         sysfs_init(content, mdfd, NULL);
1880
1881         sra = sysfs_read(mdfd, NULL, GET_VERSION|GET_DEVS);
1882         if (sra == NULL || strcmp(sra->text_version, content->text_version) != 0) {
1883                 if (content->array.major_version == -1 &&
1884                     content->array.minor_version == -2 &&
1885                     c->readonly &&
1886                     content->text_version[0] == '/')
1887                         content->text_version[0] = '-';
1888                 if (sysfs_set_array(content, md_get_version(mdfd)) != 0) {
1889                         sysfs_free(sra);
1890                         return 1;
1891                 }
1892         }
1893
1894         /* There are two types of reshape: container wide or sub-array specific
1895          * Check if metadata requests blocking container wide reshapes
1896          */
1897         start_reshape = (content->reshape_active &&
1898                          !((content->reshape_active == CONTAINER_RESHAPE) &&
1899                            (content->array.state & (1<<MD_SB_BLOCK_CONTAINER_RESHAPE))));
1900
1901         /* Block subarray here if it is under reshape now
1902          * Do not allow for any changes in this array
1903          */
1904         if (st->ss->external && content->recovery_blocked && start_reshape)
1905                 block_subarray(content);
1906
1907         for (dev2 = sra->devs; dev2; dev2 = dev2->next) {
1908                 for (dev = content->devs; dev; dev = dev->next)
1909                         if (dev2->disk.major == dev->disk.major &&
1910                             dev2->disk.minor == dev->disk.minor)
1911                                 break;
1912                 if (dev)
1913                         continue;
1914                 /* Don't want this one any more */
1915                 if (sysfs_set_str(sra, dev2, "slot", "none") < 0 &&
1916                     errno == EBUSY) {
1917                         pr_err("Cannot remove old device %s: not updating %s\n", dev2->sys_name, sra->sys_name);
1918                         sysfs_free(sra);
1919                         return 1;
1920                 }
1921                 sysfs_set_str(sra, dev2, "state", "remove");
1922         }
1923         old_raid_disks = content->array.raid_disks - content->delta_disks;
1924         avail = xcalloc(content->array.raid_disks, 1);
1925         for (dev = content->devs; dev; dev = dev->next) {
1926                 if (dev->disk.raid_disk >= 0)
1927                         avail[dev->disk.raid_disk] = 1;
1928                 if (sysfs_add_disk(content, dev, 1) == 0) {
1929                         if (dev->disk.raid_disk >= old_raid_disks &&
1930                             content->reshape_active)
1931                                 expansion++;
1932                         else
1933                                 working++;
1934                 } else if (errno == EEXIST)
1935                         preexist++;
1936         }
1937         sysfs_free(sra);
1938         if (working + expansion == 0 && c->runstop <= 0) {
1939                 free(avail);
1940                 return 1;/* Nothing new, don't try to start */
1941         }
1942         map_update(NULL, fd2devnm(mdfd), content->text_version,
1943                    content->uuid, chosen_name);
1944
1945         if (enough(content->array.level, content->array.raid_disks,
1946                    content->array.layout, content->array.state & 1, avail) == 0) {
1947                 if (c->export && result)
1948                         *result |= INCR_NO;
1949                 else if (c->verbose >= 0) {
1950                         pr_err("%s assembled with %d device%s",
1951                                chosen_name, preexist + working,
1952                                preexist + working == 1 ? "":"s");
1953                         if (preexist)
1954                                 fprintf(stderr, " (%d new)", working);
1955                         fprintf(stderr, " but not started\n");
1956                 }
1957                 free(avail);
1958                 return 1;
1959         }
1960         free(avail);
1961
1962         if (c->runstop <= 0 &&
1963             (working + preexist + expansion) <
1964             content->array.working_disks) {
1965                 if (c->export && result)
1966                         *result |= INCR_UNSAFE;
1967                 else if (c->verbose >= 0) {
1968                         pr_err("%s assembled with %d device%s",
1969                                chosen_name, preexist + working,
1970                                preexist + working == 1 ? "":"s");
1971                         if (preexist)
1972                                 fprintf(stderr, " (%d new)", working);
1973                         fprintf(stderr, " but not safe to start\n");
1974                 }
1975                 return 1;
1976         }
1977
1978
1979         if (start_reshape) {
1980                 int spare = content->array.raid_disks + expansion;
1981                 if (restore_backup(st, content,
1982                                    working,
1983                                    spare, &c->backup_file, c->verbose) == 1)
1984                         return 1;
1985
1986                 err = sysfs_set_str(content, NULL,
1987                                     "array_state", "readonly");
1988                 if (err)
1989                         return 1;
1990
1991                 if (st->ss->external) {
1992                         if (!mdmon_running(st->container_devnm))
1993                                 start_mdmon(st->container_devnm);
1994                         ping_monitor(st->container_devnm);
1995                         if (mdmon_running(st->container_devnm) &&
1996                             st->update_tail == NULL)
1997                                 st->update_tail = &st->updates;
1998                 }
1999
2000                 err = Grow_continue(mdfd, st, content, c->backup_file,
2001                                     0, c->freeze_reshape);
2002         } else switch(content->array.level) {
2003                 case LEVEL_LINEAR:
2004                 case LEVEL_MULTIPATH:
2005                 case 0:
2006                         err = sysfs_set_str(content, NULL, "array_state",
2007                                             c->readonly ? "readonly" : "active");
2008                         break;
2009                 default:
2010                         err = sysfs_set_str(content, NULL, "array_state",
2011                                             "readonly");
2012                         /* start mdmon if needed. */
2013                         if (!err) {
2014                                 if (!mdmon_running(st->container_devnm))
2015                                         start_mdmon(st->container_devnm);
2016                                 ping_monitor(st->container_devnm);
2017                         }
2018                         break;
2019                 }
2020         if (!err)
2021                 sysfs_set_safemode(content, content->safe_mode_delay);
2022
2023         /* Block subarray here if it is not reshaped now
2024          * It has be blocked a little later to allow mdmon to switch in
2025          * in to R/W state
2026          */
2027         if (st->ss->external && content->recovery_blocked &&
2028             !start_reshape)
2029                 block_subarray(content);
2030
2031         if (c->export && result) {
2032                 if (err)
2033                         *result |= INCR_NO;
2034                 else
2035                         *result |= INCR_YES;
2036         } else if (c->verbose >= 0) {
2037                 if (err)
2038                         pr_err("array %s now has %d device%s",
2039                                chosen_name, working + preexist,
2040                                working + preexist == 1 ? "":"s");
2041                 else
2042                         pr_err("Started %s with %d device%s",
2043                                chosen_name, working + preexist,
2044                                working + preexist == 1 ? "":"s");
2045                 if (preexist)
2046                         fprintf(stderr, " (%d new)", working);
2047                 if (expansion)
2048                         fprintf(stderr, " ( + %d for expansion)",
2049                                 expansion);
2050                 fprintf(stderr, "\n");
2051         }
2052         if (!err)
2053                 wait_for(chosen_name, mdfd);
2054         return err;
2055         /* FIXME should have an O_EXCL and wait for read-auto */
2056 }
2057 #endif