]> git.neil.brown.name Git - mdadm.git/blob - super1.c
Discard devnum in favour of devnm
[mdadm.git] / super1.c
1 /*
2  * mdadm - manage Linux "md" devices aka RAID arrays.
3  *
4  * Copyright (C) 2001-2009 Neil Brown <neilb@suse.de>
5  *
6  *
7  *    This program is free software; you can redistribute it and/or modify
8  *    it under the terms of the GNU General Public License as published by
9  *    the Free Software Foundation; either version 2 of the License, or
10  *    (at your option) any later version.
11  *
12  *    This program is distributed in the hope that it will be useful,
13  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *    GNU General Public License for more details.
16  *
17  *    You should have received a copy of the GNU General Public License
18  *    along with this program; if not, write to the Free Software
19  *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  *
21  *    Author: Neil Brown
22  *    Email: <neilb@suse.de>
23  */
24
25 #include "mdadm.h"
26 /*
27  * The version-1 superblock :
28  * All numeric fields are little-endian.
29  *
30  * total size: 256 bytes plus 2 per device.
31  *  1K allows 384 devices.
32  */
33 struct mdp_superblock_1 {
34         /* constant array information - 128 bytes */
35         __u32   magic;          /* MD_SB_MAGIC: 0xa92b4efc - little endian */
36         __u32   major_version;  /* 1 */
37         __u32   feature_map;    /* 0 for now */
38         __u32   pad0;           /* always set to 0 when writing */
39
40         __u8    set_uuid[16];   /* user-space generated. */
41         char    set_name[32];   /* set and interpreted by user-space */
42
43         __u64   ctime;          /* lo 40 bits are seconds, top 24 are microseconds or 0*/
44         __u32   level;          /* -4 (multipath), -1 (linear), 0,1,4,5 */
45         __u32   layout;         /* only for raid5 currently */
46         __u64   size;           /* used size of component devices, in 512byte sectors */
47
48         __u32   chunksize;      /* in 512byte sectors */
49         __u32   raid_disks;
50         __u32   bitmap_offset;  /* sectors after start of superblock that bitmap starts
51                                  * NOTE: signed, so bitmap can be before superblock
52                                  * only meaningful of feature_map[0] is set.
53                                  */
54
55         /* These are only valid with feature bit '4' */
56         __u32   new_level;      /* new level we are reshaping to                */
57         __u64   reshape_position;       /* next address in array-space for reshape */
58         __u32   delta_disks;    /* change in number of raid_disks               */
59         __u32   new_layout;     /* new layout                                   */
60         __u32   new_chunk;      /* new chunk size (bytes)                       */
61         __u32   new_offset;     /* signed number to add to data_offset in new
62                                  * layout.  0 == no-change.  This can be
63                                  * different on each device in the array.
64                                  */
65
66         /* constant this-device information - 64 bytes */
67         __u64   data_offset;    /* sector start of data, often 0 */
68         __u64   data_size;      /* sectors in this device that can be used for data */
69         __u64   super_offset;   /* sector start of this superblock */
70         __u64   recovery_offset;/* sectors before this offset (from data_offset) have been recovered */
71         __u32   dev_number;     /* permanent identifier of this  device - not role in raid */
72         __u32   cnt_corrected_read; /* number of read errors that were corrected by re-writing */
73         __u8    device_uuid[16]; /* user-space setable, ignored by kernel */
74         __u8    devflags;        /* per-device flags.  Only one defined...*/
75 #define WriteMostly1    1        /* mask for writemostly flag in above */
76         /* bad block log.  If there are any bad blocks the feature flag is set.
77          * if offset and size are non-zero, that space is reserved and available.
78          */
79         __u8    bblog_shift;    /* shift from sectors to block size for badblocklist */
80         __u16   bblog_size;     /* number of sectors reserved for badblocklist */
81         __u32   bblog_offset;   /* sector offset from superblock to bblog, signed */
82
83         /* array state information - 64 bytes */
84         __u64   utime;          /* 40 bits second, 24 btes microseconds */
85         __u64   events;         /* incremented when superblock updated */
86         __u64   resync_offset;  /* data before this offset (from data_offset) known to be in sync */
87         __u32   sb_csum;        /* checksum upto dev_roles[max_dev] */
88         __u32   max_dev;        /* size of dev_roles[] array to consider */
89         __u8    pad3[64-32];    /* set to 0 when writing */
90
91         /* device state information. Indexed by dev_number.
92          * 2 bytes per device
93          * Note there are no per-device state flags. State information is rolled
94          * into the 'roles' value.  If a device is spare or faulty, then it doesn't
95          * have a meaningful role.
96          */
97         __u16   dev_roles[0];   /* role in array, or 0xffff for a spare, or 0xfffe for faulty */
98 };
99
100 #define MAX_SB_SIZE 4096
101 /* bitmap super size is 256, but we round up to a sector for alignment */
102 #define BM_SUPER_SIZE 512
103 #define MAX_DEVS ((int)(MAX_SB_SIZE - sizeof(struct mdp_superblock_1)) / 2)
104 #define SUPER1_SIZE     (MAX_SB_SIZE + BM_SUPER_SIZE \
105                          + sizeof(struct misc_dev_info))
106
107 struct misc_dev_info {
108         __u64 device_size;
109 };
110
111 /* feature_map bits */
112 #define MD_FEATURE_BITMAP_OFFSET        1
113 #define MD_FEATURE_RECOVERY_OFFSET      2 /* recovery_offset is present and
114                                            * must be honoured
115                                            */
116 #define MD_FEATURE_RESHAPE_ACTIVE       4
117 #define MD_FEATURE_BAD_BLOCKS           8 /* badblock list is not empty */
118 #define MD_FEATURE_REPLACEMENT          16 /* This device is replacing an
119                                             * active device with same 'role'.
120                                             * 'recovery_offset' is also set.
121                                             */
122 #define MD_FEATURE_RESHAPE_BACKWARDS    32 /* Reshape doesn't change number
123                                             * of devices, but is going
124                                             * backwards anyway.
125                                             */
126 #define MD_FEATURE_NEW_OFFSET           64 /* new_offset must be honoured */
127 #define MD_FEATURE_ALL                  (MD_FEATURE_BITMAP_OFFSET       \
128                                         |MD_FEATURE_RECOVERY_OFFSET     \
129                                         |MD_FEATURE_RESHAPE_ACTIVE      \
130                                         |MD_FEATURE_BAD_BLOCKS          \
131                                         |MD_FEATURE_REPLACEMENT         \
132                                         |MD_FEATURE_RESHAPE_BACKWARDS   \
133                                         |MD_FEATURE_NEW_OFFSET          \
134                                         )
135
136 #ifndef offsetof
137 #define offsetof(t,f) ((size_t)&(((t*)0)->f))
138 #endif
139 static unsigned int calc_sb_1_csum(struct mdp_superblock_1 * sb)
140 {
141         unsigned int disk_csum, csum;
142         unsigned long long newcsum;
143         int size = sizeof(*sb) + __le32_to_cpu(sb->max_dev)*2;
144         unsigned int *isuper = (unsigned int*)sb;
145
146 /* make sure I can count... */
147         if (offsetof(struct mdp_superblock_1,data_offset) != 128 ||
148             offsetof(struct mdp_superblock_1, utime) != 192 ||
149             sizeof(struct mdp_superblock_1) != 256) {
150                 fprintf(stderr, "WARNING - superblock isn't sized correctly\n");
151         }
152
153         disk_csum = sb->sb_csum;
154         sb->sb_csum = 0;
155         newcsum = 0;
156         for (; size>=4; size -= 4 ) {
157                 newcsum += __le32_to_cpu(*isuper);
158                 isuper++;
159         }
160
161         if (size == 2)
162                 newcsum += __le16_to_cpu(*(unsigned short*) isuper);
163
164         csum = (newcsum & 0xffffffff) + (newcsum >> 32);
165         sb->sb_csum = disk_csum;
166         return __cpu_to_le32(csum);
167 }
168
169 /*
170  * Information related to file descriptor used for aligned reads/writes.
171  * Cache the block size.
172  */
173 struct align_fd {
174         int fd;
175         int blk_sz;
176 };
177
178 static void init_afd(struct align_fd *afd, int fd)
179 {
180         afd->fd = fd;
181
182         if (ioctl(afd->fd, BLKSSZGET, &afd->blk_sz) != 0)
183                 afd->blk_sz = 512;
184 }
185
186 static char abuf[4096+4096];
187 static int aread(struct align_fd *afd, void *buf, int len)
188 {
189         /* aligned read.
190          * On devices with a 4K sector size, we need to read
191          * the full sector and copy relevant bits into
192          * the buffer
193          */
194         int bsize, iosize;
195         char *b;
196         int n;
197
198         bsize = afd->blk_sz;
199
200         if (!bsize || bsize > 4096 || len > 4096) {
201                 if (!bsize)
202                         fprintf(stderr, "WARNING - aread() called with "
203                                 "invalid block size\n");
204                 return -1;
205         }
206         b = ROUND_UP_PTR((char *)abuf, 4096);
207
208         for (iosize = 0; iosize < len; iosize += bsize)
209                 ;
210         n = read(afd->fd, b, iosize);
211         if (n <= 0)
212                 return n;
213         lseek(afd->fd, len - n, 1);
214         if (n > len)
215                 n = len;
216         memcpy(buf, b, n);
217         return n;
218 }
219
220 static int awrite(struct align_fd *afd, void *buf, int len)
221 {
222         /* aligned write.
223          * On devices with a 4K sector size, we need to write
224          * the full sector.  We pre-read if the sector is larger
225          * than the write.
226          * The address must be sector-aligned.
227          */
228         int bsize, iosize;
229         char *b;
230         int n;
231
232         bsize = afd->blk_sz;
233         if (!bsize || bsize > 4096 || len > 4096) {
234                 if (!bsize)
235                         fprintf(stderr, "WARNING - awrite() called with "
236                                 "invalid block size\n");
237                 return -1;
238         }
239         b = ROUND_UP_PTR((char *)abuf, 4096);
240
241         for (iosize = 0; iosize < len ; iosize += bsize)
242                 ;
243
244         if (len != iosize) {
245                 n = read(afd->fd, b, iosize);
246                 if (n <= 0)
247                         return n;
248                 lseek(afd->fd, -n, 1);
249         }
250
251         memcpy(b, buf, len);
252         n = write(afd->fd, b, iosize);
253         if (n <= 0)
254                 return n;
255         lseek(afd->fd, len - n, 1);
256         return len;
257 }
258
259 #ifndef MDASSEMBLE
260 static void examine_super1(struct supertype *st, char *homehost)
261 {
262         struct mdp_superblock_1 *sb = st->sb;
263         time_t atime;
264         unsigned int d;
265         int role;
266         int delta_extra = 0;
267         int i;
268         char *c;
269         int l = homehost ? strlen(homehost) : 0;
270         int layout;
271         unsigned long long sb_offset;
272
273         printf("          Magic : %08x\n", __le32_to_cpu(sb->magic));
274         printf("        Version : 1");
275         sb_offset = __le64_to_cpu(sb->super_offset);
276         if (sb_offset <= 4)
277                 printf(".1\n");
278         else if (sb_offset <= 8)
279                 printf(".2\n");
280         else
281                 printf(".0\n");
282         printf("    Feature Map : 0x%x\n", __le32_to_cpu(sb->feature_map));
283         printf("     Array UUID : ");
284         for (i=0; i<16; i++) {
285                 if ((i&3)==0 && i != 0) printf(":");
286                 printf("%02x", sb->set_uuid[i]);
287         }
288         printf("\n");
289         printf("           Name : %.32s", sb->set_name);
290         if (l > 0 && l < 32 &&
291             sb->set_name[l] == ':' &&
292             strncmp(sb->set_name, homehost, l) == 0)
293                 printf("  (local to host %s)", homehost);
294         printf("\n");
295         atime = __le64_to_cpu(sb->ctime) & 0xFFFFFFFFFFULL;
296         printf("  Creation Time : %.24s\n", ctime(&atime));
297         c=map_num(pers, __le32_to_cpu(sb->level));
298         printf("     Raid Level : %s\n", c?c:"-unknown-");
299         printf("   Raid Devices : %d\n", __le32_to_cpu(sb->raid_disks));
300         printf("\n");
301         printf(" Avail Dev Size : %llu%s\n",
302                (unsigned long long)__le64_to_cpu(sb->data_size),
303                human_size(__le64_to_cpu(sb->data_size)<<9));
304         if (__le32_to_cpu(sb->level) > 0) {
305                 int ddsks = 0, ddsks_denom = 1;
306                 switch(__le32_to_cpu(sb->level)) {
307                 case 1: ddsks=1;break;
308                 case 4:
309                 case 5: ddsks = __le32_to_cpu(sb->raid_disks)-1; break;
310                 case 6: ddsks = __le32_to_cpu(sb->raid_disks)-2; break;
311                 case 10:
312                         layout = __le32_to_cpu(sb->layout);
313                         ddsks = __le32_to_cpu(sb->raid_disks);
314                         ddsks_denom = (layout&255) * ((layout>>8)&255);
315                 }
316                 if (ddsks) {
317                         long long asize = __le64_to_cpu(sb->size);
318                         asize = (asize << 9) * ddsks / ddsks_denom;
319                         printf("     Array Size : %llu%s\n",
320                                asize >> 10,  human_size(asize));
321                 }
322                 if (sb->size != sb->data_size)
323                         printf("  Used Dev Size : %llu%s\n",
324                                (unsigned long long)__le64_to_cpu(sb->size),
325                                human_size(__le64_to_cpu(sb->size)<<9));
326         }
327         if (sb->data_offset)
328                 printf("    Data Offset : %llu sectors\n",
329                        (unsigned long long)__le64_to_cpu(sb->data_offset));
330         if (sb->new_offset) {
331                 unsigned long long offset = __le64_to_cpu(sb->data_offset);
332                 offset += (signed)(int32_t)__le32_to_cpu(sb->new_offset);
333                 printf("     New Offset : %llu sectors\n", offset);
334         }
335         printf("   Super Offset : %llu sectors\n",
336                (unsigned long long)__le64_to_cpu(sb->super_offset));
337         if (__le32_to_cpu(sb->feature_map) & MD_FEATURE_RECOVERY_OFFSET)
338                 printf("Recovery Offset : %llu sectors\n", (unsigned long long)__le64_to_cpu(sb->recovery_offset));
339         printf("          State : %s\n", (__le64_to_cpu(sb->resync_offset)+1)? "active":"clean");
340         printf("    Device UUID : ");
341         for (i=0; i<16; i++) {
342                 if ((i&3)==0 && i != 0) printf(":");
343                 printf("%02x", sb->device_uuid[i]);
344         }
345         printf("\n");
346         printf("\n");
347         if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
348                 printf("Internal Bitmap : %ld sectors from superblock\n",
349                        (long)(int32_t)__le32_to_cpu(sb->bitmap_offset));
350         }
351         if (sb->feature_map & __cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE)) {
352                 printf("  Reshape pos'n : %llu%s\n", (unsigned long long)__le64_to_cpu(sb->reshape_position)/2,
353                        human_size(__le64_to_cpu(sb->reshape_position)<<9));
354                 if (__le32_to_cpu(sb->delta_disks)) {
355                         printf("  Delta Devices : %d", __le32_to_cpu(sb->delta_disks));
356                         printf(" (%d->%d)\n",
357                                __le32_to_cpu(sb->raid_disks)-__le32_to_cpu(sb->delta_disks),
358                                __le32_to_cpu(sb->raid_disks));
359                         if ((int)__le32_to_cpu(sb->delta_disks) < 0)
360                                 delta_extra = -__le32_to_cpu(sb->delta_disks);
361                 }
362                 if (__le32_to_cpu(sb->new_level) != __le32_to_cpu(sb->level)) {
363                         c = map_num(pers, __le32_to_cpu(sb->new_level));
364                         printf("      New Level : %s\n", c?c:"-unknown-");
365                 }
366                 if (__le32_to_cpu(sb->new_layout) != __le32_to_cpu(sb->layout)) {
367                         if (__le32_to_cpu(sb->level) == 5) {
368                                 c = map_num(r5layout, __le32_to_cpu(sb->new_layout));
369                                 printf("     New Layout : %s\n", c?c:"-unknown-");
370                         }
371                         if (__le32_to_cpu(sb->level) == 6) {
372                                 c = map_num(r6layout, __le32_to_cpu(sb->new_layout));
373                                 printf("     New Layout : %s\n", c?c:"-unknown-");
374                         }
375                         if (__le32_to_cpu(sb->level) == 10) {
376                                 printf("     New Layout :");
377                                 print_r10_layout(__le32_to_cpu(sb->new_layout));
378                                 printf("\n");
379                         }
380                 }
381                 if (__le32_to_cpu(sb->new_chunk) != __le32_to_cpu(sb->chunksize))
382                         printf("  New Chunksize : %dK\n", __le32_to_cpu(sb->new_chunk)/2);
383                 printf("\n");
384         }
385         if (sb->devflags) {
386                 printf("          Flags :");
387                 if (sb->devflags & WriteMostly1)
388                         printf(" write-mostly");
389                 printf("\n");
390         }
391
392         atime = __le64_to_cpu(sb->utime) & 0xFFFFFFFFFFULL;
393         printf("    Update Time : %.24s\n", ctime(&atime));
394
395         if (sb->bblog_size && sb->bblog_offset) {
396                 printf("  Bad Block Log : %d entries available at offset %ld sectors",
397                        __le16_to_cpu(sb->bblog_size)*512/8,
398                        (long)__le32_to_cpu(sb->bblog_offset));
399                 if (sb->feature_map &
400                     __cpu_to_le32(MD_FEATURE_BAD_BLOCKS))
401                         printf(" - bad blocks present.");
402                 printf("\n");
403         }
404
405
406         if (calc_sb_1_csum(sb) == sb->sb_csum)
407                 printf("       Checksum : %x - correct\n", __le32_to_cpu(sb->sb_csum));
408         else
409                 printf("       Checksum : %x - expected %x\n", __le32_to_cpu(sb->sb_csum),
410                        __le32_to_cpu(calc_sb_1_csum(sb)));
411         printf("         Events : %llu\n", (unsigned long long)__le64_to_cpu(sb->events));
412         printf("\n");
413         if (__le32_to_cpu(sb->level) == 5) {
414                 c = map_num(r5layout, __le32_to_cpu(sb->layout));
415                 printf("         Layout : %s\n", c?c:"-unknown-");
416         }
417         if (__le32_to_cpu(sb->level) == 6) {
418                 c = map_num(r6layout, __le32_to_cpu(sb->layout));
419                 printf("         Layout : %s\n", c?c:"-unknown-");
420         }
421         if (__le32_to_cpu(sb->level) == 10) {
422                 int lo = __le32_to_cpu(sb->layout);
423                 printf("         Layout :");
424                 print_r10_layout(lo);
425                 printf("\n");
426         }
427         switch(__le32_to_cpu(sb->level)) {
428         case 0:
429         case 4:
430         case 5:
431         case 6:
432         case 10:
433                 printf("     Chunk Size : %dK\n", __le32_to_cpu(sb->chunksize)/2);
434                 break;
435         case -1:
436                 printf("       Rounding : %dK\n", __le32_to_cpu(sb->chunksize)/2);
437                 break;
438         default: break;
439         }
440         printf("\n");
441 #if 0
442         /* This turns out to just be confusing */
443         printf("    Array Slot : %d (", __le32_to_cpu(sb->dev_number));
444         for (i= __le32_to_cpu(sb->max_dev); i> 0 ; i--)
445                 if (__le16_to_cpu(sb->dev_roles[i-1]) != 0xffff)
446                         break;
447         for (d=0; d < i; d++) {
448                 int role = __le16_to_cpu(sb->dev_roles[d]);
449                 if (d) printf(", ");
450                 if (role == 0xffff) printf("empty");
451                 else if(role == 0xfffe) printf("failed");
452                 else printf("%d", role);
453         }
454         printf(")\n");
455 #endif
456         printf("   Device Role : ");
457         d = __le32_to_cpu(sb->dev_number);
458         if (d < __le32_to_cpu(sb->max_dev))
459                 role = __le16_to_cpu(sb->dev_roles[d]);
460         else
461                 role = 0xFFFF;
462         if (role >= 0xFFFE)
463                 printf("spare\n");
464         else if (sb->feature_map & __cpu_to_le32(MD_FEATURE_REPLACEMENT))
465                 printf("Replacement device %d\n", role);
466         else
467                 printf("Active device %d\n", role);
468
469         printf("   Array State : ");
470         for (d=0; d<__le32_to_cpu(sb->raid_disks) + delta_extra; d++) {
471                 int cnt = 0;
472                 unsigned int i;
473                 for (i=0; i< __le32_to_cpu(sb->max_dev); i++) {
474                         unsigned int role = __le16_to_cpu(sb->dev_roles[i]);
475                         if (role == d)
476                                 cnt++;
477                 }
478                 if (cnt == 2)
479                         printf("R");
480                 else if (cnt == 1)
481                         printf("A");
482                 else if (cnt == 0)
483                         printf(".");
484                 else
485                         printf("?");
486         }
487 #if 0
488         /* This is confusing too */
489         faulty = 0;
490         for (i=0; i< __le32_to_cpu(sb->max_dev); i++) {
491                 int role = __le16_to_cpu(sb->dev_roles[i]);
492                 if (role == 0xFFFE)
493                         faulty++;
494         }
495         if (faulty) printf(" %d failed", faulty);
496 #endif
497         printf(" ('A' == active, '.' == missing, 'R' == replacing)");
498         printf("\n");
499 }
500
501
502 static void brief_examine_super1(struct supertype *st, int verbose)
503 {
504         struct mdp_superblock_1 *sb = st->sb;
505         int i;
506         unsigned long long sb_offset;
507         char *nm;
508         char *c=map_num(pers, __le32_to_cpu(sb->level));
509
510         nm = strchr(sb->set_name, ':');
511         if (nm)
512                 nm++;
513         else if (sb->set_name[0])
514                 nm = sb->set_name;
515         else
516                 nm = NULL;
517
518         printf("ARRAY ");
519         if (nm) {
520                 printf("/dev/md/");
521                 print_escape(nm);
522                 putchar(' ');
523         }
524         if (verbose && c)
525                 printf(" level=%s", c);
526         sb_offset = __le64_to_cpu(sb->super_offset);
527         if (sb_offset <= 4)
528                 printf(" metadata=1.1 ");
529         else if (sb_offset <= 8)
530                 printf(" metadata=1.2 ");
531         else
532                 printf(" metadata=1.0 ");
533         if (verbose)
534                 printf("num-devices=%d ", __le32_to_cpu(sb->raid_disks));
535         printf("UUID=");
536         for (i=0; i<16; i++) {
537                 if ((i&3)==0 && i != 0) printf(":");
538                 printf("%02x", sb->set_uuid[i]);
539         }
540         if (sb->set_name[0]) {
541                 printf(" name=");
542                 print_quoted(sb->set_name);
543         }
544         printf("\n");
545 }
546
547 static void export_examine_super1(struct supertype *st)
548 {
549         struct mdp_superblock_1 *sb = st->sb;
550         int i;
551         int len = 32;
552         int layout;
553
554         printf("MD_LEVEL=%s\n", map_num(pers, __le32_to_cpu(sb->level)));
555         printf("MD_DEVICES=%d\n", __le32_to_cpu(sb->raid_disks));
556         for (i=0; i<32; i++)
557                 if (sb->set_name[i] == '\n' ||
558                     sb->set_name[i] == '\0') {
559                         len = i;
560                         break;
561                 }
562         if (len)
563                 printf("MD_NAME=%.*s\n", len, sb->set_name);
564         if (__le32_to_cpu(sb->level) > 0) {
565                 int ddsks = 0, ddsks_denom = 1;
566                 switch(__le32_to_cpu(sb->level)) {
567                         case 1: ddsks=1;break;
568                         case 4:
569                         case 5: ddsks = __le32_to_cpu(sb->raid_disks)-1; break;
570                         case 6: ddsks = __le32_to_cpu(sb->raid_disks)-2; break;
571                         case 10:
572                                 layout = __le32_to_cpu(sb->layout);
573                                 ddsks = __le32_to_cpu(sb->raid_disks);
574                                 ddsks_denom = (layout&255) * ((layout>>8)&255);
575                         }
576                 if (ddsks) {
577                         long long asize = __le64_to_cpu(sb->size);
578                         asize = (asize << 9) * ddsks / ddsks_denom;
579                         printf("MD_ARRAY_SIZE=%s\n",human_size_brief(asize,JEDEC));
580                 }
581         }
582         printf("MD_UUID=");
583         for (i=0; i<16; i++) {
584                 if ((i&3)==0 && i != 0) printf(":");
585                 printf("%02x", sb->set_uuid[i]);
586         }
587         printf("\n");
588         printf("MD_UPDATE_TIME=%llu\n",
589                __le64_to_cpu(sb->utime) & 0xFFFFFFFFFFULL);
590         printf("MD_DEV_UUID=");
591         for (i=0; i<16; i++) {
592                 if ((i&3)==0 && i != 0) printf(":");
593                 printf("%02x", sb->device_uuid[i]);
594         }
595         printf("\n");
596         printf("MD_EVENTS=%llu\n",
597                (unsigned long long)__le64_to_cpu(sb->events));
598 }
599
600 static void detail_super1(struct supertype *st, char *homehost)
601 {
602         struct mdp_superblock_1 *sb = st->sb;
603         int i;
604         int l = homehost ? strlen(homehost) : 0;
605
606         printf("           Name : %.32s", sb->set_name);
607         if (l > 0 && l < 32 &&
608             sb->set_name[l] == ':' &&
609             strncmp(sb->set_name, homehost, l) == 0)
610                 printf("  (local to host %s)", homehost);
611         printf("\n           UUID : ");
612         for (i=0; i<16; i++) {
613                 if ((i&3)==0 && i != 0) printf(":");
614                 printf("%02x", sb->set_uuid[i]);
615         }
616         printf("\n         Events : %llu\n\n", (unsigned long long)__le64_to_cpu(sb->events));
617 }
618
619 static void brief_detail_super1(struct supertype *st)
620 {
621         struct mdp_superblock_1 *sb = st->sb;
622         int i;
623
624         if (sb->set_name[0]) {
625                 printf(" name=");
626                 print_quoted(sb->set_name);
627         }
628         printf(" UUID=");
629         for (i=0; i<16; i++) {
630                 if ((i&3)==0 && i != 0) printf(":");
631                 printf("%02x", sb->set_uuid[i]);
632         }
633 }
634
635 static void export_detail_super1(struct supertype *st)
636 {
637         struct mdp_superblock_1 *sb = st->sb;
638         int i;
639         int len = 32;
640
641         for (i=0; i<32; i++)
642                 if (sb->set_name[i] == '\n' ||
643                     sb->set_name[i] == '\0') {
644                         len = i;
645                         break;
646                 }
647         if (len)
648                 printf("MD_NAME=%.*s\n", len, sb->set_name);
649 }
650
651 static int examine_badblocks_super1(struct supertype *st, int fd, char *devname)
652 {
653         struct mdp_superblock_1 *sb = st->sb;
654         unsigned long long offset;
655         int size;
656         __u64 *bbl, *bbp;
657         int i;
658
659         if  (!sb->bblog_size || __le32_to_cpu(sb->bblog_size) > 100
660              || !sb->bblog_offset){
661                 printf("No bad-blocks list configured on %s\n", devname);
662                 return 0;
663         }
664         if ((sb->feature_map & __cpu_to_le32(MD_FEATURE_BAD_BLOCKS))
665             == 0) {
666                 printf("Bad-blocks list is empty in %s\n", devname);
667                 return 0;
668         }
669
670         size = __le32_to_cpu(sb->bblog_size)* 512;
671         posix_memalign((void**)&bbl, 4096, size);
672         offset = __le64_to_cpu(sb->super_offset) +
673                 (int)__le32_to_cpu(sb->bblog_offset);
674         offset <<= 9;
675         if (lseek64(fd, offset, 0) < 0) {
676                 pr_err("Cannot seek to bad-blocks list\n");
677                 return 1;
678         }
679         if (read(fd, bbl, size) != size) {
680                 pr_err("Cannot read bad-blocks list\n");
681                 return 1;
682         }
683         /* 64bits per entry. 10 bits is block-count, 54 bits is block
684          * offset.  Blocks are sectors unless bblog->shift makes them bigger
685          */
686         bbp = (__u64*)bbl;
687         printf("Bad-blocks on %s:\n", devname);
688         for (i = 0; i < size/8; i++, bbp++) {
689                 __u64 bb = __le64_to_cpu(*bbp);
690                 int count = bb & 0x3ff;
691                 unsigned long long sector = bb >> 10;
692
693                 if (bb + 1 == 0)
694                         break;
695
696                 sector <<= sb->bblog_shift;
697                 count <<= sb->bblog_shift;
698
699                 printf("%20llu for %d sectors\n", sector, count);
700         }
701         return 0;
702 }
703
704 #endif
705
706 static int match_home1(struct supertype *st, char *homehost)
707 {
708         struct mdp_superblock_1 *sb = st->sb;
709         int l = homehost ? strlen(homehost) : 0;
710
711         return (l > 0 && l < 32 &&
712                 sb->set_name[l] == ':' &&
713                 strncmp(sb->set_name, homehost, l) == 0);
714 }
715
716 static void uuid_from_super1(struct supertype *st, int uuid[4])
717 {
718         struct mdp_superblock_1 *super = st->sb;
719         char *cuuid = (char*)uuid;
720         int i;
721         for (i=0; i<16; i++)
722                 cuuid[i] = super->set_uuid[i];
723 }
724
725 static void getinfo_super1(struct supertype *st, struct mdinfo *info, char *map)
726 {
727         struct mdp_superblock_1 *sb = st->sb;
728         struct bitmap_super_s *bsb = (void*)(((char*)sb)+MAX_SB_SIZE);
729         struct misc_dev_info *misc = (void*)(((char*)sb)+MAX_SB_SIZE+BM_SUPER_SIZE);
730         int working = 0;
731         unsigned int i;
732         unsigned int role;
733         unsigned int map_disks = info->array.raid_disks;
734         unsigned long long super_offset;
735         unsigned long long data_size;
736
737         memset(info, 0, sizeof(*info));
738         info->array.major_version = 1;
739         info->array.minor_version = st->minor_version;
740         info->array.patch_version = 0;
741         info->array.raid_disks = __le32_to_cpu(sb->raid_disks);
742         info->array.level = __le32_to_cpu(sb->level);
743         info->array.layout = __le32_to_cpu(sb->layout);
744         info->array.md_minor = -1;
745         info->array.ctime = __le64_to_cpu(sb->ctime);
746         info->array.utime = __le64_to_cpu(sb->utime);
747         info->array.chunk_size = __le32_to_cpu(sb->chunksize)*512;
748         info->array.state =
749                 (__le64_to_cpu(sb->resync_offset) == MaxSector)
750                 ? 1 : 0;
751
752         info->data_offset = __le64_to_cpu(sb->data_offset);
753         info->component_size = __le64_to_cpu(sb->size);
754         if (sb->feature_map & __le32_to_cpu(MD_FEATURE_BITMAP_OFFSET))
755                 info->bitmap_offset = (int32_t)__le32_to_cpu(sb->bitmap_offset);
756
757         info->disk.major = 0;
758         info->disk.minor = 0;
759         info->disk.number = __le32_to_cpu(sb->dev_number);
760         if (__le32_to_cpu(sb->dev_number) >= __le32_to_cpu(sb->max_dev) ||
761             __le32_to_cpu(sb->dev_number) >= MAX_DEVS)
762                 role = 0xfffe;
763         else
764                 role = __le16_to_cpu(sb->dev_roles[__le32_to_cpu(sb->dev_number)]);
765
766         super_offset = __le64_to_cpu(sb->super_offset);
767         data_size = __le64_to_cpu(sb->size);
768         if (info->data_offset < super_offset) {
769                 unsigned long long end;
770                 info->space_before = info->data_offset;
771                 end = super_offset;
772                 if (info->bitmap_offset < 0)
773                         end += info->bitmap_offset;
774                 if (info->data_offset + data_size < end)
775                         info->space_after = end - data_size - info->data_offset;
776                 else
777                         info->space_after = 0;
778         } else {
779                 info->space_before = (info->data_offset -
780                                       super_offset);
781                 if (info->bitmap_offset > 0) {
782                         unsigned long long bmend = info->bitmap_offset;
783                         unsigned long long size = __le64_to_cpu(bsb->sync_size);
784                         size /= __le32_to_cpu(bsb->chunksize) >> 9;
785                         size = (size + 7) >> 3;
786                         size += sizeof(bitmap_super_t);
787                         size = ROUND_UP(size, 4096);
788                         size /= 512;
789                         size += bmend;
790                         if (size < info->space_before)
791                                 info->space_before -= size;
792                         else
793                                 info->space_before = 0;
794                 } else
795                         info->space_before -= 8; /* superblock */
796                 info->space_after = misc->device_size - data_size - info->data_offset;
797         }
798
799         info->disk.raid_disk = -1;
800         switch(role) {
801         case 0xFFFF:
802                 info->disk.state = 0; /* spare: not active, not sync, not faulty */
803                 break;
804         case 0xFFFE:
805                 info->disk.state = 1; /* faulty */
806                 break;
807         default:
808                 info->disk.state = 6; /* active and in sync */
809                 info->disk.raid_disk = role;
810         }
811         if (sb->devflags & WriteMostly1)
812                 info->disk.state |= (1 << MD_DISK_WRITEMOSTLY);
813         info->events = __le64_to_cpu(sb->events);
814         sprintf(info->text_version, "1.%d", st->minor_version);
815         info->safe_mode_delay = 200;
816
817         memcpy(info->uuid, sb->set_uuid, 16);
818
819         strncpy(info->name, sb->set_name, 32);
820         info->name[32] = 0;
821
822         if ((__le32_to_cpu(sb->feature_map)&MD_FEATURE_REPLACEMENT)) {
823                 info->disk.state &= ~(1 << MD_DISK_SYNC);
824                 info->disk.state |=  1 << MD_DISK_REPLACEMENT;
825         }
826
827
828         if (sb->feature_map & __le32_to_cpu(MD_FEATURE_RECOVERY_OFFSET))
829                 info->recovery_start = __le32_to_cpu(sb->recovery_offset);
830         else
831                 info->recovery_start = MaxSector;
832
833         if (sb->feature_map & __le32_to_cpu(MD_FEATURE_RESHAPE_ACTIVE)) {
834                 info->reshape_active = 1;
835                 if (info->array.level == 10)
836                         info->reshape_active |= RESHAPE_NO_BACKUP;
837                 info->reshape_progress = __le64_to_cpu(sb->reshape_position);
838                 info->new_level = __le32_to_cpu(sb->new_level);
839                 info->delta_disks = __le32_to_cpu(sb->delta_disks);
840                 info->new_layout = __le32_to_cpu(sb->new_layout);
841                 info->new_chunk = __le32_to_cpu(sb->new_chunk)<<9;
842                 if (info->delta_disks < 0)
843                         info->array.raid_disks -= info->delta_disks;
844         } else
845                 info->reshape_active = 0;
846
847         info->recovery_blocked = info->reshape_active;
848
849         if (map)
850                 for (i=0; i<map_disks; i++)
851                         map[i] = 0;
852         for (i = 0; i < __le32_to_cpu(sb->max_dev); i++) {
853                 role = __le16_to_cpu(sb->dev_roles[i]);
854                 if (/*role == 0xFFFF || */role < (unsigned) info->array.raid_disks) {
855                         working++;
856                         if (map && role < map_disks)
857                                 map[role] = 1;
858                 }
859         }
860
861         info->array.working_disks = working;
862 }
863
864 static struct mdinfo *container_content1(struct supertype *st, char *subarray)
865 {
866         struct mdinfo *info;
867
868         if (subarray)
869                 return NULL;
870
871         info = xmalloc(sizeof(*info));
872         getinfo_super1(st, info, NULL);
873         return info;
874 }
875
876 static int update_super1(struct supertype *st, struct mdinfo *info,
877                          char *update,
878                          char *devname, int verbose,
879                          int uuid_set, char *homehost)
880 {
881         /* NOTE: for 'assemble' and 'force' we need to return non-zero
882          * if any change was made.  For others, the return value is
883          * ignored.
884          */
885         int rv = 0;
886         struct mdp_superblock_1 *sb = st->sb;
887
888         if (strcmp(update, "homehost") == 0 &&
889             homehost) {
890                 /* Note that 'homehost' is special as it is really
891                  * a "name" update.
892                  */
893                 char *c;
894                 update = "name";
895                 c = strchr(sb->set_name, ':');
896                 if (c)
897                         strncpy(info->name, c+1, 31 - (c-sb->set_name));
898                 else
899                         strncpy(info->name, sb->set_name, 32);
900                 info->name[32] = 0;
901         }
902
903         if (strcmp(update, "force-one")==0) {
904                 /* Not enough devices for a working array,
905                  * so bring this one up-to-date
906                  */
907                 if (sb->events != __cpu_to_le64(info->events))
908                         rv = 1;
909                 sb->events = __cpu_to_le64(info->events);
910         } else if (strcmp(update, "force-array")==0) {
911                 /* Degraded array and 'force' requests to
912                  * maybe need to mark it 'clean'.
913                  */
914                 switch(__le32_to_cpu(sb->level)) {
915                 case 5: case 4: case 6:
916                         /* need to force clean */
917                         if (sb->resync_offset != MaxSector)
918                                 rv = 1;
919                         sb->resync_offset = MaxSector;
920                 }
921         } else if (strcmp(update, "assemble")==0) {
922                 int d = info->disk.number;
923                 int want;
924                 if (info->disk.state & (1<<MD_DISK_ACTIVE))
925                         want = info->disk.raid_disk;
926                 else
927                         want = 0xFFFF;
928                 if (sb->dev_roles[d] != __cpu_to_le16(want)) {
929                         sb->dev_roles[d] = __cpu_to_le16(want);
930                         rv = 1;
931                 }
932                 if (info->reshape_active &&
933                     sb->feature_map & __le32_to_cpu(MD_FEATURE_RESHAPE_ACTIVE) &&
934                     info->delta_disks >= 0 &&
935                     info->reshape_progress < __le64_to_cpu(sb->reshape_position)) {
936                         sb->reshape_position = __cpu_to_le64(info->reshape_progress);
937                         rv = 1;
938                 }
939                 if (info->reshape_active &&
940                     sb->feature_map & __le32_to_cpu(MD_FEATURE_RESHAPE_ACTIVE) &&
941                     info->delta_disks < 0 &&
942                     info->reshape_progress > __le64_to_cpu(sb->reshape_position)) {
943                         sb->reshape_position = __cpu_to_le64(info->reshape_progress);
944                         rv = 1;
945                 }
946         } else if (strcmp(update, "linear-grow-new") == 0) {
947                 unsigned int i;
948                 int rfd, fd;
949                 unsigned int max = __le32_to_cpu(sb->max_dev);
950
951                 for (i=0 ; i < max ; i++)
952                         if (__le16_to_cpu(sb->dev_roles[i]) >= 0xfffe)
953                                 break;
954                 sb->dev_number = __cpu_to_le32(i);
955                 info->disk.number = i;
956                 if (max >= __le32_to_cpu(sb->max_dev))
957                         sb->max_dev = __cpu_to_le32(max+1);
958
959                 if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
960                     read(rfd, sb->device_uuid, 16) != 16) {
961                         __u32 r[4] = {random(), random(), random(), random()};
962                         memcpy(sb->device_uuid, r, 16);
963                 }
964                 if (rfd >= 0)
965                         close(rfd);
966
967                 sb->dev_roles[i] =
968                         __cpu_to_le16(info->disk.raid_disk);
969
970                 fd = open(devname, O_RDONLY);
971                 if (fd >= 0) {
972                         unsigned long long ds;
973                         get_dev_size(fd, devname, &ds);
974                         close(fd);
975                         ds >>= 9;
976                         if (__le64_to_cpu(sb->super_offset) <
977                             __le64_to_cpu(sb->data_offset)) {
978                                 sb->data_size = __cpu_to_le64(
979                                         ds - __le64_to_cpu(sb->data_offset));
980                         } else {
981                                 ds -= 8*2;
982                                 ds &= ~(unsigned long long)(4*2-1);
983                                 sb->super_offset = __cpu_to_le64(ds);
984                                 sb->data_size = __cpu_to_le64(
985                                         ds - __le64_to_cpu(sb->data_offset));
986                         }
987                 }
988         } else if (strcmp(update, "linear-grow-update") == 0) {
989                 sb->raid_disks = __cpu_to_le32(info->array.raid_disks);
990                 sb->dev_roles[info->disk.number] =
991                         __cpu_to_le16(info->disk.raid_disk);
992         } else if (strcmp(update, "resync") == 0) {
993                 /* make sure resync happens */
994                 sb->resync_offset = 0ULL;
995         } else if (strcmp(update, "uuid") == 0) {
996                 copy_uuid(sb->set_uuid, info->uuid, super1.swapuuid);
997
998                 if (__le32_to_cpu(sb->feature_map)&MD_FEATURE_BITMAP_OFFSET) {
999                         struct bitmap_super_s *bm;
1000                         bm = (struct bitmap_super_s*)(st->sb+MAX_SB_SIZE);
1001                         memcpy(bm->uuid, sb->set_uuid, 16);
1002                 }
1003         } else if (strcmp(update, "no-bitmap") == 0) {
1004                 sb->feature_map &= ~__cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
1005         } else if (strcmp(update, "bbl") == 0) {
1006                 /* only possible if there is room after the bitmap, or if
1007                  * there is no bitmap
1008                  */
1009                 unsigned long long sb_offset = __le64_to_cpu(sb->super_offset);
1010                 unsigned long long data_offset = __le64_to_cpu(sb->data_offset);
1011                 long bitmap_offset = (long)__le64_to_cpu(sb->bitmap_offset);
1012                 long bm_sectors = 0;
1013                 long space;
1014
1015                 if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
1016                         struct bitmap_super_s *bsb;
1017                         bsb = (struct bitmap_super_s *)(((char*)sb)+MAX_SB_SIZE);
1018                         bm_sectors = bitmap_sectors(bsb);
1019                 }
1020
1021                 if (sb_offset < data_offset) {
1022                         /* 1.1 or 1.2.  Put bbl just before data
1023                          */
1024                         long bb_offset;
1025                         space = data_offset - sb_offset;
1026                         bb_offset = space - 8;
1027                         if (bm_sectors && bitmap_offset > 0)
1028                                 space -= (bitmap_offset + bm_sectors);
1029                         else
1030                                 space -= 8; /* The superblock */
1031                         if (space >= 8) {
1032                                 sb->bblog_size = __cpu_to_le16(8);
1033                                 sb->bblog_offset = __cpu_to_le32(bb_offset);
1034                         }
1035                 } else {
1036                         /* 1.0 - Put bbl just before super block */
1037                         if (bm_sectors && bitmap_offset < 0)
1038                                 space = -bitmap_offset - bm_sectors;
1039                         else
1040                                 space = sb_offset - data_offset -
1041                                         __le64_to_cpu(sb->data_size);
1042                         if (space >= 8) {
1043                                 sb->bblog_size = __cpu_to_le16(8);
1044                                 sb->bblog_offset = __cpu_to_le32((unsigned)-8);
1045                         }
1046                 }
1047         } else if (strcmp(update, "no-bbl") == 0) {
1048                 if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BAD_BLOCKS))
1049                         pr_err("Cannot remove active bbl from %s\n",devname);
1050                 else {
1051                         sb->bblog_size = 0;
1052                         sb->bblog_shift = 0;
1053                         sb->bblog_offset = 0;
1054                 }
1055         } else if (strcmp(update, "name") == 0) {
1056                 if (info->name[0] == 0)
1057                         sprintf(info->name, "%d", info->array.md_minor);
1058                 memset(sb->set_name, 0, sizeof(sb->set_name));
1059                 if (homehost &&
1060                     strchr(info->name, ':') == NULL &&
1061                     strlen(homehost)+1+strlen(info->name) < 32) {
1062                         strcpy(sb->set_name, homehost);
1063                         strcat(sb->set_name, ":");
1064                         strcat(sb->set_name, info->name);
1065                 } else
1066                         strcpy(sb->set_name, info->name);
1067         } else if (strcmp(update, "devicesize") == 0 &&
1068             __le64_to_cpu(sb->super_offset) <
1069             __le64_to_cpu(sb->data_offset)) {
1070                 /* set data_size to device size less data_offset */
1071                 struct misc_dev_info *misc = (struct misc_dev_info*)
1072                         (st->sb + MAX_SB_SIZE + BM_SUPER_SIZE);
1073                 printf("Size was %llu\n", (unsigned long long)
1074                        __le64_to_cpu(sb->data_size));
1075                 sb->data_size = __cpu_to_le64(
1076                         misc->device_size - __le64_to_cpu(sb->data_offset));
1077                 printf("Size is %llu\n", (unsigned long long)
1078                        __le64_to_cpu(sb->data_size));
1079         } else if (strcmp(update, "_reshape_progress")==0)
1080                 sb->reshape_position = __cpu_to_le64(info->reshape_progress);
1081         else if (strcmp(update, "writemostly")==0)
1082                 sb->devflags |= WriteMostly1;
1083         else if (strcmp(update, "readwrite")==0)
1084                 sb->devflags &= ~WriteMostly1;
1085         else
1086                 rv = -1;
1087
1088         sb->sb_csum = calc_sb_1_csum(sb);
1089         return rv;
1090 }
1091
1092 static int init_super1(struct supertype *st, mdu_array_info_t *info,
1093                        unsigned long long size, char *name, char *homehost,
1094                        int *uuid, unsigned long long data_offset)
1095 {
1096         struct mdp_superblock_1 *sb;
1097         int spares;
1098         int rfd;
1099         char defname[10];
1100         int sbsize;
1101
1102         if (posix_memalign((void**)&sb, 4096, SUPER1_SIZE) != 0) {
1103                 pr_err("%s could not allocate superblock\n", __func__);
1104                 return 0;
1105         }
1106         memset(sb, 0, SUPER1_SIZE);
1107
1108         st->sb = sb;
1109         if (info == NULL) {
1110                 /* zeroing superblock */
1111                 return 0;
1112         }
1113
1114         spares = info->working_disks - info->active_disks;
1115         if (info->raid_disks + spares  > MAX_DEVS) {
1116                 pr_err("too many devices requested: %d+%d > %d\n",
1117                         info->raid_disks , spares, MAX_DEVS);
1118                 return 0;
1119         }
1120
1121         sb->magic = __cpu_to_le32(MD_SB_MAGIC);
1122         sb->major_version = __cpu_to_le32(1);
1123         sb->feature_map = 0;
1124         sb->pad0 = 0;
1125
1126         if (uuid)
1127                 copy_uuid(sb->set_uuid, uuid, super1.swapuuid);
1128         else {
1129                 if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
1130                     read(rfd, sb->set_uuid, 16) != 16) {
1131                         __u32 r[4] = {random(), random(), random(), random()};
1132                         memcpy(sb->set_uuid, r, 16);
1133                 }
1134                 if (rfd >= 0) close(rfd);
1135         }
1136
1137         if (name == NULL || *name == 0) {
1138                 sprintf(defname, "%d", info->md_minor);
1139                 name = defname;
1140         }
1141         if (homehost &&
1142             strchr(name, ':')== NULL &&
1143             strlen(homehost)+1+strlen(name) < 32) {
1144                 strcpy(sb->set_name, homehost);
1145                 strcat(sb->set_name, ":");
1146                 strcat(sb->set_name, name);
1147         } else
1148                 strcpy(sb->set_name, name);
1149
1150         sb->ctime = __cpu_to_le64((unsigned long long)time(0));
1151         sb->level = __cpu_to_le32(info->level);
1152         sb->layout = __cpu_to_le32(info->layout);
1153         sb->size = __cpu_to_le64(size*2ULL);
1154         sb->chunksize = __cpu_to_le32(info->chunk_size>>9);
1155         sb->raid_disks = __cpu_to_le32(info->raid_disks);
1156
1157         sb->data_offset = __cpu_to_le64(data_offset);
1158         sb->data_size = __cpu_to_le64(0);
1159         sb->super_offset = __cpu_to_le64(0);
1160         sb->recovery_offset = __cpu_to_le64(0);
1161
1162         sb->utime = sb->ctime;
1163         sb->events = __cpu_to_le64(1);
1164         if (info->state & (1<<MD_SB_CLEAN))
1165                 sb->resync_offset = MaxSector;
1166         else
1167                 sb->resync_offset = 0;
1168         sbsize = sizeof(struct mdp_superblock_1) + 2 * (info->raid_disks + spares);
1169         sbsize = ROUND_UP(sbsize, 512);
1170         sb->max_dev = __cpu_to_le32((sbsize - sizeof(struct mdp_superblock_1)) / 2);
1171
1172         memset(sb->dev_roles, 0xff, MAX_SB_SIZE - sizeof(struct mdp_superblock_1));
1173
1174         return 1;
1175 }
1176
1177 struct devinfo {
1178         int fd;
1179         char *devname;
1180         long long data_offset;
1181         mdu_disk_info_t disk;
1182         struct devinfo *next;
1183 };
1184 #ifndef MDASSEMBLE
1185 /* Add a device to the superblock being created */
1186 static int add_to_super1(struct supertype *st, mdu_disk_info_t *dk,
1187                          int fd, char *devname, unsigned long long data_offset)
1188 {
1189         struct mdp_superblock_1 *sb = st->sb;
1190         __u16 *rp = sb->dev_roles + dk->number;
1191         struct devinfo *di, **dip;
1192
1193         if ((dk->state & 6) == 6) /* active, sync */
1194                 *rp = __cpu_to_le16(dk->raid_disk);
1195         else if ((dk->state & ~2) == 0) /* active or idle -> spare */
1196                 *rp = 0xffff;
1197         else
1198                 *rp = 0xfffe;
1199
1200         if (dk->number >= (int)__le32_to_cpu(sb->max_dev) &&
1201             __le32_to_cpu(sb->max_dev) < MAX_DEVS)
1202                 sb->max_dev = __cpu_to_le32(dk->number+1);
1203
1204         sb->dev_number = __cpu_to_le32(dk->number);
1205         sb->devflags = 0; /* don't copy another disks flags */
1206         sb->sb_csum = calc_sb_1_csum(sb);
1207
1208         dip = (struct devinfo **)&st->info;
1209         while (*dip)
1210                 dip = &(*dip)->next;
1211         di = xmalloc(sizeof(struct devinfo));
1212         di->fd = fd;
1213         di->devname = devname;
1214         di->disk = *dk;
1215         di->data_offset = data_offset;
1216         di->next = NULL;
1217         *dip = di;
1218
1219         return 0;
1220 }
1221 #endif
1222
1223 static void locate_bitmap1(struct supertype *st, int fd);
1224
1225 static int store_super1(struct supertype *st, int fd)
1226 {
1227         struct mdp_superblock_1 *sb = st->sb;
1228         unsigned long long sb_offset;
1229         struct align_fd afd;
1230         int sbsize;
1231         unsigned long long dsize;
1232
1233         if (!get_dev_size(fd, NULL, &dsize))
1234                 return 1;
1235
1236         dsize >>= 9;
1237
1238         if (dsize < 24)
1239                 return 2;
1240
1241         init_afd(&afd, fd);
1242
1243         /*
1244          * Calculate the position of the superblock.
1245          * It is always aligned to a 4K boundary and
1246          * depending on minor_version, it can be:
1247          * 0: At least 8K, but less than 12K, from end of device
1248          * 1: At start of device
1249          * 2: 4K from start of device.
1250          */
1251         switch(st->minor_version) {
1252         case 0:
1253                 sb_offset = dsize;
1254                 sb_offset -= 8*2;
1255                 sb_offset &= ~(4*2-1);
1256                 break;
1257         case 1:
1258                 sb_offset = 0;
1259                 break;
1260         case 2:
1261                 sb_offset = 4*2;
1262                 break;
1263         default:
1264                 return -EINVAL;
1265         }
1266
1267
1268
1269         if (sb_offset != __le64_to_cpu(sb->super_offset) &&
1270             0 != __le64_to_cpu(sb->super_offset)
1271                 ) {
1272                 pr_err("internal error - sb_offset is wrong\n");
1273                 abort();
1274         }
1275
1276         if (lseek64(fd, sb_offset << 9, 0)< 0LL)
1277                 return 3;
1278
1279         sbsize = ROUND_UP(sizeof(*sb) + 2 * __le32_to_cpu(sb->max_dev), 512);
1280
1281         if (awrite(&afd, sb, sbsize) != sbsize)
1282                 return 4;
1283
1284         if (sb->feature_map & __cpu_to_le32(MD_FEATURE_BITMAP_OFFSET)) {
1285                 struct bitmap_super_s *bm = (struct bitmap_super_s*)
1286                         (((char*)sb)+MAX_SB_SIZE);
1287                 if (__le32_to_cpu(bm->magic) == BITMAP_MAGIC) {
1288                         locate_bitmap1(st, fd);
1289                         if (awrite(&afd, bm, sizeof(*bm)) != sizeof(*bm))
1290                                 return 5;
1291                 }
1292         }
1293         fsync(fd);
1294         return 0;
1295 }
1296
1297 static int load_super1(struct supertype *st, int fd, char *devname);
1298
1299 static unsigned long choose_bm_space(unsigned long devsize)
1300 {
1301         /* if the device is bigger than 8Gig, save 64k for bitmap usage,
1302          * if bigger than 200Gig, save 128k
1303          * NOTE: result must be multiple of 4K else bad things happen
1304          * on 4K-sector devices.
1305          */
1306         if (devsize < 64*2) return 0;
1307         if (devsize - 64*2 >= 200*1024*1024*2)
1308                 return 128*2;
1309         if (devsize - 4*2 > 8*1024*1024*2)
1310                 return 64*2;
1311         return 4*2;
1312 }
1313
1314 static void free_super1(struct supertype *st);
1315
1316 #ifndef MDASSEMBLE
1317 static int write_init_super1(struct supertype *st)
1318 {
1319         struct mdp_superblock_1 *sb = st->sb;
1320         struct supertype *refst;
1321         int rfd;
1322         int rv = 0;
1323         unsigned long long bm_space;
1324         unsigned long long reserved;
1325         struct devinfo *di;
1326         unsigned long long dsize, array_size;
1327         unsigned long long sb_offset, headroom;
1328         unsigned long long data_offset;
1329
1330         for (di = st->info; di; di = di->next) {
1331                 if (di->disk.state == 1)
1332                         continue;
1333                 if (di->fd < 0)
1334                         continue;
1335
1336                 while (Kill(di->devname, NULL, 0, -1, 1) == 0)
1337                         ;
1338
1339                 sb->dev_number = __cpu_to_le32(di->disk.number);
1340                 if (di->disk.state & (1<<MD_DISK_WRITEMOSTLY))
1341                         sb->devflags |= WriteMostly1;
1342                 else
1343                         sb->devflags &= ~WriteMostly1;
1344
1345                 if ((rfd = open("/dev/urandom", O_RDONLY)) < 0 ||
1346                     read(rfd, sb->device_uuid, 16) != 16) {
1347                         __u32 r[4] = {random(), random(), random(), random()};
1348                         memcpy(sb->device_uuid, r, 16);
1349                 }
1350                 if (rfd >= 0)
1351                         close(rfd);
1352
1353                 sb->events = 0;
1354
1355                 refst = dup_super(st);
1356                 if (load_super1(refst, di->fd, NULL)==0) {
1357                         struct mdp_superblock_1 *refsb = refst->sb;
1358
1359                         memcpy(sb->device_uuid, refsb->device_uuid, 16);
1360                         if (memcmp(sb->set_uuid, refsb->set_uuid, 16)==0) {
1361                                 /* same array, so preserve events and
1362                                  * dev_number */
1363                                 sb->events = refsb->events;
1364                                 /* bugs in 2.6.17 and earlier mean the
1365                                  * dev_number chosen in Manage must be preserved
1366                                  */
1367                                 if (get_linux_version() >= 2006018)
1368                                         sb->dev_number = refsb->dev_number;
1369                         }
1370                         free_super1(refst);
1371                 }
1372                 free(refst);
1373
1374                 if (!get_dev_size(di->fd, NULL, &dsize)) {
1375                         rv = 1;
1376                         goto error_out;
1377                 }
1378                 dsize >>= 9;
1379
1380                 if (dsize < 24) {
1381                         close(di->fd);
1382                         rv = 2;
1383                         goto error_out;
1384                 }
1385
1386
1387                 /*
1388                  * Calculate the position of the superblock.
1389                  * It is always aligned to a 4K boundary and
1390                  * depending on minor_version, it can be:
1391                  * 0: At least 8K, but less than 12K, from end of device
1392                  * 1: At start of device
1393                  * 2: 4K from start of device.
1394                  * Depending on the array size, we might leave extra space
1395                  * for a bitmap.
1396                  * Also leave 4K for bad-block log.
1397                  */
1398                 array_size = __le64_to_cpu(sb->size);
1399                 /* work out how much space we left for a bitmap,
1400                  * Add 8 sectors for bad block log */
1401                 bm_space = choose_bm_space(array_size) + 8;
1402
1403                 /* We try to leave 0.1% at the start for reshape
1404                  * operations, but limit this to 128Meg (0.1% of 10Gig)
1405                  * which is plenty for efficient reshapes
1406                  * However we make it at least 2 chunks as one chunk
1407                  * is minimum needed for reshape.
1408                  */
1409                 headroom = 128 * 1024 * 2;
1410                 while  (headroom << 10 > array_size &&
1411                         headroom/2 >= __le32_to_cpu(sb->chunksize) * 2)
1412                         headroom >>= 1;
1413
1414                 data_offset = di->data_offset;
1415                 switch(st->minor_version) {
1416                 case 0:
1417                         sb_offset = dsize;
1418                         sb_offset -= 8*2;
1419                         sb_offset &= ~(4*2-1);
1420                         sb->super_offset = __cpu_to_le64(sb_offset);
1421                         if (data_offset == INVALID_SECTORS)
1422                                 sb->data_offset = 0;
1423                         if (sb_offset < array_size + bm_space)
1424                                 bm_space = sb_offset - array_size;
1425                         sb->data_size = __cpu_to_le64(sb_offset - bm_space);
1426                         if (bm_space >= 8) {
1427                                 sb->bblog_size = __cpu_to_le16(8);
1428                                 sb->bblog_offset = __cpu_to_le32((unsigned)-8);
1429                         }
1430                         break;
1431                 case 1:
1432                         sb->super_offset = __cpu_to_le64(0);
1433                         if (data_offset == INVALID_SECTORS) {
1434                                 reserved = bm_space + 4*2;
1435                                 if (reserved < headroom)
1436                                         reserved = headroom;
1437                                 if (reserved + array_size > dsize)
1438                                         reserved = dsize - array_size;
1439                                 /* Try for multiple of 1Meg so it is nicely aligned */
1440                                 #define ONE_MEG (2*1024)
1441                                 if (reserved > ONE_MEG)
1442                                         reserved = (reserved/ONE_MEG) * ONE_MEG;
1443
1444                                 /* force 4K alignment */
1445                                 reserved &= ~7ULL;
1446
1447                         } else
1448                                 reserved = data_offset;
1449
1450                         sb->data_offset = __cpu_to_le64(reserved);
1451                         sb->data_size = __cpu_to_le64(dsize - reserved);
1452                         if (reserved >= 16) {
1453                                 sb->bblog_size = __cpu_to_le16(8);
1454                                 sb->bblog_offset = __cpu_to_le32(reserved-8);
1455                         }
1456                         break;
1457                 case 2:
1458                         sb_offset = 4*2;
1459                         sb->super_offset = __cpu_to_le64(4*2);
1460                         if (data_offset == INVALID_SECTORS) {
1461                                 if (4*2 + 4*2 + bm_space + array_size
1462                                     > dsize)
1463                                         bm_space = dsize - array_size
1464                                                 - 4*2 - 4*2;
1465
1466                                 reserved = bm_space + 4*2 + 4*2;
1467                                 if (reserved < headroom)
1468                                         reserved = headroom;
1469                                 if (reserved + array_size > dsize)
1470                                         reserved = dsize - array_size;
1471                                 /* Try for multiple of 1Meg so it is nicely aligned */
1472                                 #define ONE_MEG (2*1024)
1473                                 if (reserved > ONE_MEG)
1474                                         reserved = (reserved/ONE_MEG) * ONE_MEG;
1475
1476                                 /* force 4K alignment */
1477                                 reserved &= ~7ULL;
1478
1479                         } else
1480                                 reserved = data_offset;
1481
1482                         sb->data_offset = __cpu_to_le64(reserved);
1483                         sb->data_size = __cpu_to_le64(dsize - reserved);
1484                         if (reserved >= 16+16) {
1485                                 sb->bblog_size = __cpu_to_le16(8);
1486                                 /* '8' sectors for the bblog, and another '8'
1487                                  * because we want offset from superblock, not
1488                                  * start of device.
1489                                  */
1490                                 sb->bblog_offset = __cpu_to_le32(reserved-8-8);
1491                         }
1492                         break;
1493                 default:
1494                         pr_err("Failed to write invalid "
1495                                "metadata format 1.%i to %s\n",
1496                                st->minor_version, di->devname);
1497                         rv = -EINVAL;
1498                         goto out;
1499                 }
1500
1501                 sb->sb_csum = calc_sb_1_csum(sb);
1502                 rv = store_super1(st, di->fd);
1503                 if (rv == 0 && (__le32_to_cpu(sb->feature_map) & 1))
1504                         rv = st->ss->write_bitmap(st, di->fd);
1505                 close(di->fd);
1506                 di->fd = -1;
1507                 if (rv)
1508                         goto error_out;
1509         }
1510 error_out:
1511         if (rv)
1512                 pr_err("Failed to write metadata to %s\n",
1513                        di->devname);
1514 out:
1515         return rv;
1516 }
1517 #endif
1518
1519 static int compare_super1(struct supertype *st, struct supertype *tst)
1520 {
1521         /*
1522          * return:
1523          *  0 same, or first was empty, and second was copied
1524          *  1 second had wrong number
1525          *  2 wrong uuid
1526          *  3 wrong other info
1527          */
1528         struct mdp_superblock_1 *first = st->sb;
1529         struct mdp_superblock_1 *second = tst->sb;
1530
1531         if (second->magic != __cpu_to_le32(MD_SB_MAGIC))
1532                 return 1;
1533         if (second->major_version != __cpu_to_le32(1))
1534                 return 1;
1535
1536         if (!first) {
1537                 if (posix_memalign((void**)&first, 4096, SUPER1_SIZE) != 0) {
1538                         pr_err("%s could not allocate superblock\n", __func__);
1539                         return 1;
1540                 }
1541                 memcpy(first, second, SUPER1_SIZE);
1542                 st->sb = first;
1543                 return 0;
1544         }
1545         if (memcmp(first->set_uuid, second->set_uuid, 16)!= 0)
1546                 return 2;
1547
1548         if (first->ctime      != second->ctime     ||
1549             first->level      != second->level     ||
1550             first->layout     != second->layout    ||
1551             first->size       != second->size      ||
1552             first->chunksize  != second->chunksize ||
1553             first->raid_disks != second->raid_disks)
1554                 return 3;
1555         return 0;
1556 }
1557
1558 static int load_super1(struct supertype *st, int fd, char *devname)
1559 {
1560         unsigned long long dsize;
1561         unsigned long long sb_offset;
1562         struct mdp_superblock_1 *super;
1563         int uuid[4];
1564         struct bitmap_super_s *bsb;
1565         struct misc_dev_info *misc;
1566         struct align_fd afd;
1567
1568         free_super1(st);
1569
1570         init_afd(&afd, fd);
1571
1572         if (st->ss == NULL || st->minor_version == -1) {
1573                 int bestvers = -1;
1574                 struct supertype tst;
1575                 __u64 bestctime = 0;
1576                 /* guess... choose latest ctime */
1577                 memset(&tst, 0, sizeof(tst));
1578                 tst.ss = &super1;
1579                 for (tst.minor_version = 0; tst.minor_version <= 2 ; tst.minor_version++) {
1580                         switch(load_super1(&tst, fd, devname)) {
1581                         case 0: super = tst.sb;
1582                                 if (bestvers == -1 ||
1583                                     bestctime < __le64_to_cpu(super->ctime)) {
1584                                         bestvers = tst.minor_version;
1585                                         bestctime = __le64_to_cpu(super->ctime);
1586                                 }
1587                                 free(super);
1588                                 tst.sb = NULL;
1589                                 break;
1590                         case 1: return 1; /*bad device */
1591                         case 2: break; /* bad, try next */
1592                         }
1593                 }
1594                 if (bestvers != -1) {
1595                         int rv;
1596                         tst.minor_version = bestvers;
1597                         tst.ss = &super1;
1598                         tst.max_devs = MAX_DEVS;
1599                         rv = load_super1(&tst, fd, devname);
1600                         if (rv == 0)
1601                                 *st = tst;
1602                         return rv;
1603                 }
1604                 return 2;
1605         }
1606         if (!get_dev_size(fd, devname, &dsize))
1607                 return 1;
1608         dsize >>= 9;
1609
1610         if (dsize < 24) {
1611                 if (devname)
1612                         pr_err("%s is too small for md: size is %llu sectors.\n",
1613                                 devname, dsize);
1614                 return 1;
1615         }
1616
1617         /*
1618          * Calculate the position of the superblock.
1619          * It is always aligned to a 4K boundary and
1620          * depending on minor_version, it can be:
1621          * 0: At least 8K, but less than 12K, from end of device
1622          * 1: At start of device
1623          * 2: 4K from start of device.
1624          */
1625         switch(st->minor_version) {
1626         case 0:
1627                 sb_offset = dsize;
1628                 sb_offset -= 8*2;
1629                 sb_offset &= ~(4*2-1);
1630                 break;
1631         case 1:
1632                 sb_offset = 0;
1633                 break;
1634         case 2:
1635                 sb_offset = 4*2;
1636                 break;
1637         default:
1638                 return -EINVAL;
1639         }
1640
1641         if (lseek64(fd, sb_offset << 9, 0)< 0LL) {
1642                 if (devname)
1643                         pr_err("Cannot seek to superblock on %s: %s\n",
1644                                 devname, strerror(errno));
1645                 return 1;
1646         }
1647
1648         if (posix_memalign((void**)&super, 4096, SUPER1_SIZE) != 0) {
1649                 pr_err("%s could not allocate superblock\n",
1650                         __func__);
1651                 return 1;
1652         }
1653
1654         if (aread(&afd, super, MAX_SB_SIZE) != MAX_SB_SIZE) {
1655                 if (devname)
1656                         pr_err("Cannot read superblock on %s\n",
1657                                 devname);
1658                 free(super);
1659                 return 1;
1660         }
1661
1662         if (__le32_to_cpu(super->magic) != MD_SB_MAGIC) {
1663                 if (devname)
1664                         pr_err("No super block found on %s (Expected magic %08x, got %08x)\n",
1665                                 devname, MD_SB_MAGIC, __le32_to_cpu(super->magic));
1666                 free(super);
1667                 return 2;
1668         }
1669
1670         if (__le32_to_cpu(super->major_version) != 1) {
1671                 if (devname)
1672                         pr_err("Cannot interpret superblock on %s - version is %d\n",
1673                                 devname, __le32_to_cpu(super->major_version));
1674                 free(super);
1675                 return 2;
1676         }
1677         if (__le64_to_cpu(super->super_offset) != sb_offset) {
1678                 if (devname)
1679                         pr_err("No superblock found on %s (super_offset is wrong)\n",
1680                                 devname);
1681                 free(super);
1682                 return 2;
1683         }
1684         st->sb = super;
1685
1686         bsb = (struct bitmap_super_s *)(((char*)super)+MAX_SB_SIZE);
1687
1688         misc = (struct misc_dev_info*) (((char*)super)+MAX_SB_SIZE+BM_SUPER_SIZE);
1689         misc->device_size = dsize;
1690
1691         /* Now check on the bitmap superblock */
1692         if ((__le32_to_cpu(super->feature_map)&MD_FEATURE_BITMAP_OFFSET) == 0)
1693                 return 0;
1694         /* Read the bitmap superblock and make sure it looks
1695          * valid.  If it doesn't clear the bit.  An --assemble --force
1696          * should get that written out.
1697          */
1698         locate_bitmap1(st, fd);
1699         if (aread(&afd, bsb, 512) != 512)
1700                 goto no_bitmap;
1701
1702         uuid_from_super1(st, uuid);
1703         if (__le32_to_cpu(bsb->magic) != BITMAP_MAGIC ||
1704             memcmp(bsb->uuid, uuid, 16) != 0)
1705                 goto no_bitmap;
1706         return 0;
1707
1708  no_bitmap:
1709         super->feature_map = __cpu_to_le32(__le32_to_cpu(super->feature_map)
1710                                            & ~MD_FEATURE_BITMAP_OFFSET);
1711         return 0;
1712 }
1713
1714
1715 static struct supertype *match_metadata_desc1(char *arg)
1716 {
1717         struct supertype *st = xcalloc(1, sizeof(*st));
1718
1719         st->container_devnm[0] = 0;
1720         st->ss = &super1;
1721         st->max_devs = MAX_DEVS;
1722         st->sb = NULL;
1723         /* leading zeros can be safely ignored.  --detail generates them. */
1724         while (*arg == '0')
1725                 arg++;
1726         if (strcmp(arg, "1.0") == 0 ||
1727             strcmp(arg, "1.00") == 0) {
1728                 st->minor_version = 0;
1729                 return st;
1730         }
1731         if (strcmp(arg, "1.1") == 0 ||
1732             strcmp(arg, "1.01") == 0
1733                 ) {
1734                 st->minor_version = 1;
1735                 return st;
1736         }
1737         if (strcmp(arg, "1.2") == 0 ||
1738 #ifndef DEFAULT_OLD_METADATA /* ifdef in super0.c */
1739             strcmp(arg, "default") == 0 ||
1740 #endif /* DEFAULT_OLD_METADATA */
1741             strcmp(arg, "1.02") == 0) {
1742                 st->minor_version = 2;
1743                 return st;
1744         }
1745         if (strcmp(arg, "1") == 0 ||
1746             strcmp(arg, "default") == 0) {
1747                 st->minor_version = -1;
1748                 return st;
1749         }
1750
1751         free(st);
1752         return NULL;
1753 }
1754
1755 /* find available size on device with this devsize, using
1756  * superblock type st, and reserving 'reserve' sectors for
1757  * a possible bitmap
1758  */
1759 static __u64 _avail_size1(struct supertype *st, __u64 devsize,
1760                           unsigned long long data_offset, int chunksize)
1761 {
1762         struct mdp_superblock_1 *super = st->sb;
1763         int bmspace = 0;
1764         if (devsize < 24)
1765                 return 0;
1766
1767         if (super == NULL)
1768                 /* creating:  allow suitable space for bitmap */
1769                 bmspace = choose_bm_space(devsize);
1770 #ifndef MDASSEMBLE
1771         else if (__le32_to_cpu(super->feature_map)&MD_FEATURE_BITMAP_OFFSET) {
1772                 /* hot-add. allow for actual size of bitmap */
1773                 struct bitmap_super_s *bsb;
1774                 bsb = (struct bitmap_super_s *)(((char*)super)+MAX_SB_SIZE);
1775                 bmspace = bitmap_sectors(bsb);
1776         }
1777 #endif
1778         /* Allow space for bad block log */
1779         if (super && super->bblog_size)
1780                 devsize -= __le16_to_cpu(super->bblog_size);
1781         else
1782                 devsize -= 8;
1783
1784
1785         if (st->minor_version < 0)
1786                 /* not specified, so time to set default */
1787                 st->minor_version = 2;
1788
1789         if (data_offset != INVALID_SECTORS)
1790                 switch(st->minor_version) {
1791                 case 0:
1792                         return devsize - data_offset - 8*2;
1793                 case 1:
1794                 case 2:
1795                         return devsize - data_offset;
1796                 default:
1797                         return 0;
1798                 }
1799
1800         devsize -= bmspace;
1801
1802         if (super == NULL && st->minor_version > 0) {
1803                 /* haven't committed to a size yet, so allow some
1804                  * slack for space for reshape.
1805                  * Limit slack to 128M, but aim for about 0.1%
1806                  */
1807                 unsigned long long headroom = 128*1024*2;
1808                 while ((headroom << 10) > devsize &&
1809                        (chunksize == 0 ||
1810                         headroom / 2 >= ((unsigned)chunksize*2)*2))
1811                         headroom >>= 1;
1812                 devsize -= headroom;
1813         }
1814         switch(st->minor_version) {
1815         case 0:
1816                 /* at end */
1817                 return ((devsize - 8*2 ) & ~(4*2-1));
1818         case 1:
1819                 /* at start, 4K for superblock and possible bitmap */
1820                 return devsize - 4*2;
1821         case 2:
1822                 /* 4k from start, 4K for superblock and possible bitmap */
1823                 return devsize - (4+4)*2;
1824         }
1825         return 0;
1826 }
1827 static __u64 avail_size1(struct supertype *st, __u64 devsize,
1828                          unsigned long long data_offset)
1829 {
1830         return _avail_size1(st, devsize, data_offset, 0);
1831 }
1832
1833 static int
1834 add_internal_bitmap1(struct supertype *st,
1835                      int *chunkp, int delay, int write_behind,
1836                      unsigned long long size,
1837                      int may_change, int major)
1838 {
1839         /*
1840          * If not may_change, then this is a 'Grow' without sysfs support for
1841          * bitmaps, and the bitmap must fit after the superblock at 1K offset.
1842          * If may_change, then this is create or a Grow with sysfs syupport,
1843          * and we can put the bitmap wherever we like.
1844          *
1845          * size is in sectors,  chunk is in bytes !!!
1846          */
1847
1848         unsigned long long bits;
1849         unsigned long long max_bits;
1850         unsigned long long min_chunk;
1851         long offset;
1852         long bbl_offset, bbl_size;
1853         unsigned long long chunk = *chunkp;
1854         int room = 0;
1855         int creating = 0;
1856         struct mdp_superblock_1 *sb = st->sb;
1857         bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb) + MAX_SB_SIZE);
1858         int uuid[4];
1859
1860
1861         if (__le64_to_cpu(sb->data_size) == 0)
1862                 /* Must be creating the array, else data_size would be non-zero */
1863                 creating = 1;
1864         switch(st->minor_version) {
1865         case 0:
1866                 /* either 3K after the superblock (when hot-add),
1867                  * or some amount of space before.
1868                  */
1869                 if (creating) {
1870                         /* We are creating array, so we *know* how much room has
1871                          * been left.
1872                          */
1873                         offset = 0;
1874                         room = choose_bm_space(__le64_to_cpu(sb->size));
1875                         bbl_size = 8;
1876                 } else {
1877                         room = __le64_to_cpu(sb->super_offset)
1878                                 - __le64_to_cpu(sb->data_offset)
1879                                 - __le64_to_cpu(sb->data_size);
1880                         bbl_size = __le16_to_cpu(sb->bblog_size);
1881                         if (bbl_size < 8)
1882                                 bbl_size = 8;
1883                         bbl_offset = (__s32)__le32_to_cpu(sb->bblog_offset);
1884                         if (bbl_size < -bbl_offset)
1885                                 bbl_size = -bbl_offset;
1886
1887                         if (!may_change || (room < 3*2 &&
1888                                   __le32_to_cpu(sb->max_dev) <= 384)) {
1889                                 room = 3*2;
1890                                 offset = 1*2;
1891                                 bbl_size = 0;
1892                         } else {
1893                                 offset = 0; /* means movable offset */
1894                         }
1895                 }
1896                 break;
1897         case 1:
1898         case 2: /* between superblock and data */
1899                 if (creating) {
1900                         offset = 4*2;
1901                         room = choose_bm_space(__le64_to_cpu(sb->size));
1902                         bbl_size = 8;
1903                 } else {
1904                         room = __le64_to_cpu(sb->data_offset)
1905                                 - __le64_to_cpu(sb->super_offset);
1906                         bbl_size = __le16_to_cpu(sb->bblog_size);
1907                         if (bbl_size)
1908                                 room = __le32_to_cpu(sb->bblog_offset) + bbl_size;
1909                         else
1910                                 bbl_size = 8;
1911
1912                         if (!may_change) {
1913                                 room -= 2; /* Leave 1K for superblock */
1914                                 offset = 2;
1915                                 bbl_size = 0;
1916                         } else {
1917                                 room -= 4*2; /* leave 4K for superblock */
1918                                 offset = 4*2;
1919                         }
1920                 }
1921                 break;
1922         default:
1923                 return 0;
1924         }
1925
1926         room -= bbl_size;
1927         if (chunk == UnSet && room > 128*2)
1928                 /* Limit to 128K of bitmap when chunk size not requested */
1929                 room = 128*2;
1930
1931         max_bits = (room * 512 - sizeof(bitmap_super_t)) * 8;
1932
1933         min_chunk = 4096; /* sub-page chunks don't work yet.. */
1934         bits = (size*512)/min_chunk +1;
1935         while (bits > max_bits) {
1936                 min_chunk *= 2;
1937                 bits = (bits+1)/2;
1938         }
1939         if (chunk == UnSet) {
1940                 /* For practical purpose, 64Meg is a good
1941                  * default chunk size for internal bitmaps.
1942                  */
1943                 chunk = min_chunk;
1944                 if (chunk < 64*1024*1024)
1945                         chunk = 64*1024*1024;
1946         } else if (chunk < min_chunk)
1947                 return 0; /* chunk size too small */
1948         if (chunk == 0) /* rounding problem */
1949                 return 0;
1950
1951         if (offset == 0) {
1952                 /* start bitmap on a 4K boundary with enough space for
1953                  * the bitmap
1954                  */
1955                 bits = (size*512) / chunk + 1;
1956                 room = ((bits+7)/8 + sizeof(bitmap_super_t) +4095)/4096;
1957                 room *= 8; /* convert 4K blocks to sectors */
1958                 offset = -room - bbl_size;
1959         }
1960
1961         sb->bitmap_offset = (int32_t)__cpu_to_le32(offset);
1962
1963         sb->feature_map = __cpu_to_le32(__le32_to_cpu(sb->feature_map)
1964                                         | MD_FEATURE_BITMAP_OFFSET);
1965         memset(bms, 0, sizeof(*bms));
1966         bms->magic = __cpu_to_le32(BITMAP_MAGIC);
1967         bms->version = __cpu_to_le32(major);
1968         uuid_from_super1(st, uuid);
1969         memcpy(bms->uuid, uuid, 16);
1970         bms->chunksize = __cpu_to_le32(chunk);
1971         bms->daemon_sleep = __cpu_to_le32(delay);
1972         bms->sync_size = __cpu_to_le64(size);
1973         bms->write_behind = __cpu_to_le32(write_behind);
1974
1975         *chunkp = chunk;
1976         return 1;
1977 }
1978
1979 static void locate_bitmap1(struct supertype *st, int fd)
1980 {
1981         unsigned long long offset;
1982         struct mdp_superblock_1 *sb;
1983         int mustfree = 0;
1984
1985         if (!st->sb) {
1986                 if (st->ss->load_super(st, fd, NULL))
1987                         return; /* no error I hope... */
1988                 mustfree = 1;
1989         }
1990         sb = st->sb;
1991
1992         offset = __le64_to_cpu(sb->super_offset);
1993         offset += (int32_t) __le32_to_cpu(sb->bitmap_offset);
1994         if (mustfree)
1995                 free(sb);
1996         lseek64(fd, offset<<9, 0);
1997 }
1998
1999 static int write_bitmap1(struct supertype *st, int fd)
2000 {
2001         struct mdp_superblock_1 *sb = st->sb;
2002         bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb)+MAX_SB_SIZE);
2003         int rv = 0;
2004         void *buf;
2005         int towrite, n;
2006         struct align_fd afd;
2007
2008         init_afd(&afd, fd);
2009
2010         locate_bitmap1(st, fd);
2011
2012         if (posix_memalign(&buf, 4096, 4096))
2013                 return -ENOMEM;
2014
2015         memset(buf, 0xff, 4096);
2016         memcpy(buf, (char *)bms, sizeof(bitmap_super_t));
2017
2018         towrite = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9);
2019         towrite = (towrite+7) >> 3; /* bits to bytes */
2020         towrite += sizeof(bitmap_super_t);
2021         towrite = ROUND_UP(towrite, 512);
2022         while (towrite > 0) {
2023                 n = towrite;
2024                 if (n > 4096)
2025                         n = 4096;
2026                 n = awrite(&afd, buf, n);
2027                 if (n > 0)
2028                         towrite -= n;
2029                 else
2030                         break;
2031                 memset(buf, 0xff, 4096);
2032         }
2033         fsync(fd);
2034         if (towrite)
2035                 rv = -2;
2036
2037         free(buf);
2038         return rv;
2039 }
2040
2041 static void free_super1(struct supertype *st)
2042 {
2043         if (st->sb)
2044                 free(st->sb);
2045         while (st->info) {
2046                 struct devinfo *di = st->info;
2047                 st->info = di->next;
2048                 if (di->fd >= 0)
2049                         close(di->fd);
2050                 free(di);
2051         }
2052         st->sb = NULL;
2053 }
2054
2055 #ifndef MDASSEMBLE
2056 static int validate_geometry1(struct supertype *st, int level,
2057                               int layout, int raiddisks,
2058                               int *chunk, unsigned long long size,
2059                               unsigned long long data_offset,
2060                               char *subdev, unsigned long long *freesize,
2061                               int verbose)
2062 {
2063         unsigned long long ldsize;
2064         int fd;
2065
2066         if (level == LEVEL_CONTAINER) {
2067                 if (verbose)
2068                         pr_err("1.x metadata does not support containers\n");
2069                 return 0;
2070         }
2071         if (chunk && *chunk == UnSet)
2072                 *chunk = DEFAULT_CHUNK;
2073
2074         if (!subdev)
2075                 return 1;
2076
2077         fd = open(subdev, O_RDONLY|O_EXCL, 0);
2078         if (fd < 0) {
2079                 if (verbose)
2080                         pr_err("super1.x cannot open %s: %s\n",
2081                                 subdev, strerror(errno));
2082                 return 0;
2083         }
2084
2085         if (!get_dev_size(fd, subdev, &ldsize)) {
2086                 close(fd);
2087                 return 0;
2088         }
2089         close(fd);
2090
2091         *freesize = _avail_size1(st, ldsize >> 9, data_offset, *chunk);
2092         return 1;
2093 }
2094 #endif /* MDASSEMBLE */
2095
2096 struct superswitch super1 = {
2097 #ifndef MDASSEMBLE
2098         .examine_super = examine_super1,
2099         .brief_examine_super = brief_examine_super1,
2100         .export_examine_super = export_examine_super1,
2101         .detail_super = detail_super1,
2102         .brief_detail_super = brief_detail_super1,
2103         .export_detail_super = export_detail_super1,
2104         .write_init_super = write_init_super1,
2105         .validate_geometry = validate_geometry1,
2106         .add_to_super = add_to_super1,
2107         .examine_badblocks = examine_badblocks_super1,
2108 #endif
2109         .match_home = match_home1,
2110         .uuid_from_super = uuid_from_super1,
2111         .getinfo_super = getinfo_super1,
2112         .container_content = container_content1,
2113         .update_super = update_super1,
2114         .init_super = init_super1,
2115         .store_super = store_super1,
2116         .compare_super = compare_super1,
2117         .load_super = load_super1,
2118         .match_metadata_desc = match_metadata_desc1,
2119         .avail_size = avail_size1,
2120         .add_internal_bitmap = add_internal_bitmap1,
2121         .locate_bitmap = locate_bitmap1,
2122         .write_bitmap = write_bitmap1,
2123         .free_super = free_super1,
2124 #if __BYTE_ORDER == BIG_ENDIAN
2125         .swapuuid = 0,
2126 #else
2127         .swapuuid = 1,
2128 #endif
2129         .name = "1.x",
2130 };