3 * Flush out the given cluster with a cluster head.
4 * We "know" there is room in the cluster head and
8 #define _FILE_OFFSET_BITS 64
10 #include <lafs/lafs.h>
17 /*-----------------------------------------------------------------------
18 * A segment is divided up in a slightly complicated way into
19 * tables, rows and columns. This allows us to align writes with
20 * stripes in a raid4 array or similar.
21 * So we have some support routines to help track our way around
24 * A write cluster always comprises a whole number of rows - it is padded
25 * with zeros if necessary.
26 * However block addressing follow columns, so the blocks written to
27 * a cluster may well not be contiguously addressed.
29 * There are 'width' blocks (or columns) in a row, and either:
30 * - segment_stride rows in a table, if segment_stride < segment_size or
31 * - one table per segment, when segment_stride > segment_size.
32 * These are calculated and summarised in rows_per_table and tables_per_seg
33 * in the 'struct lafs_device' structure.
35 * A 'seg_pos' records the current segment and where we are in it.
37 * dev,num to identify the segment
38 * st_table,st_row to identify where the cluster starts.
39 * nxt_table,nxt_row to identify where the cluster ends.
40 * table,row,col to identify where the next block will be found
43 static int seg_remainder(struct lafs *fs, struct lafs_segpos *seg)
45 /* return the number of blocks from the start of segpos to the
47 * i.e. remaining rows in this table, plus remaining tables in
50 struct lafs_device *dv = dev_by_num(fs, seg->dev);
51 int rows = dv->rows_per_table - seg->st_row;
53 if (seg->dev < 0) abort();
54 rows += dv->rows_per_table * (dv->tables_per_seg - seg->st_table - 1);
55 return rows * dv->width;
59 static void seg_step(struct lafs *fs, struct lafs_segpos *seg)
61 /* reposition this segpos to be immediately after it's current end
62 * and make the 'current' point be the start.
65 seg->st_table = seg->nxt_table;
66 seg->st_row = seg->nxt_row;
67 seg->table = seg->st_table;
68 seg->row = seg->st_row;
73 static u32 seg_setsize(struct lafs *fs, struct lafs_segpos *seg, u32 size)
75 /* move the 'nxt' table/row to be 'size' blocks beyond
76 * current start. size will be rounded up to a multiple
79 struct lafs_device *dv = dev_by_num(fs, seg->dev);
83 if (seg->dev < 0) abort();
84 rows = (size + dv->width - 1) / dv->width;
85 rv = rows * dv->width;
87 seg->nxt_table = seg->st_table + rows / dv->rows_per_table;
88 seg->nxt_row = rows % dv->rows_per_table;
92 static u64 seg_addr(struct lafs *fs, struct lafs_segpos *seg)
94 /* Return the virtual address of the blocks pointed
97 struct lafs_device *dv = dev_by_num(fs, seg->dev);
101 /* Setting 'next' address for last cluster in
102 * a cleaner segment */
104 addr = seg->col * dv->stride;
106 addr += seg->table * dv->rows_per_table;
107 addr += seg->num * dv->segment_stride;
112 static u64 seg_next(struct lafs *fs, struct lafs_segpos *seg)
114 /* step forward one block, returning the address of
115 * the block stepped over
117 struct lafs_device *dv = dev_by_num(fs, seg->dev);
118 u64 addr = seg_addr(fs, seg);
120 /* now step forward in column or table or seg */
122 if (seg->col >= dv->width) {
125 if (seg->row >= dv->rows_per_table) {
133 /*-------------------------------------------------------------------------
134 * Cluster head building.
135 * We build the cluster head bit by bit as we find blocks
136 * in the list. These routines help.
139 static void cluster_addhead(struct lafs_cluster *wc, struct lafs_ino *ino,
141 struct group_head **headstart)
143 struct group_head *gh = (void*)((char *)wc->chead +
148 gh->inum = __cpu_to_le32(ino->inum);
149 gh->fsnum = __cpu_to_le32(ino->filesys->inum);
150 tnf = ((ino->generation<<8) | (ino->trunc_gen & 0xff))
154 gh->truncatenum_and_flag = __cpu_to_le16(tnf);
155 wc->chead_size += sizeof(struct group_head);
158 static void cluster_closehead(struct lafs_cluster *wc,
159 struct group_head *headstart)
161 int size = wc->chead_size - (((char *)headstart) - (char *)wc->chead);
163 headstart->group_size_words = size / 4;
167 static void cluster_addmini(struct lafs_cluster *wc, u32 addr, int offset,
168 int size, const char *data,
169 int size2, const char *data2)
171 /* if size2 !=0, then only
172 * (size-size2) is at 'data' and the rest is at 'data2'
174 struct miniblock *mb = ((struct miniblock *)
175 ((char *)wc->chead + wc->chead_size));
177 mb->block_num = __cpu_to_le32(addr);
178 mb->block_offset = __cpu_to_le16(offset);
179 mb->length = __cpu_to_le16(size + DescMiniOffset);
180 wc->chead_size += sizeof(struct miniblock);
181 memcpy(mb->data, data, size-size2);
183 memcpy(mb->data + size-size2, data2, size2);
184 memset(mb->data+size, 0, (-size)&3);
185 wc->chead_size += ROUND_UP(size);
189 static void cluster_adddesc(struct lafs_cluster *wc, struct lafs_blk *blk,
190 struct descriptor **desc_start)
192 struct descriptor *dh = (struct descriptor *)((char *)wc->chead +
195 dh->block_num = __cpu_to_le32(blk->fileaddr);
196 dh->block_cnt = __cpu_to_le32(0);
198 if (blk->flags && B_Index)
199 dh->block_bytes = DescIndex;
200 wc->chead_size += sizeof(struct descriptor);
203 static void cluster_incdesc(struct lafs_cluster *wc, struct descriptor *desc_start,
204 struct lafs_blk *b, int blksz)
206 desc_start->block_cnt =
207 __cpu_to_le32(__le32_to_cpu(desc_start->block_cnt)+1);
208 if (!(b->flags & B_Index)) {
209 if (b->ino->type >= TypeBase) {
210 u64 size = b->ino->md.file.size;
211 if (size > ((loff_t)b->fileaddr * blksz) &&
212 size <= ((loff_t)(b->fileaddr + 1) * blksz))
213 desc_start->block_bytes =
214 __cpu_to_le32(size & (blksz-1));
216 desc_start->block_bytes =
217 __cpu_to_le32(blksz);
219 desc_start->block_bytes = __cpu_to_le32(blksz);
224 int lafs_calc_cluster_csum(struct cluster_head *head)
226 unsigned int oldcsum = head->checksum;
227 unsigned long long newcsum = 0;
230 unsigned int *superc = (unsigned int *) head;
233 for (i = 0; i < __le16_to_cpu(head->Hlength)/4; i++)
234 newcsum += __le32_to_cpu(superc[i]);
235 csum = (newcsum & 0xffffffff) + (newcsum>>32);
236 head->checksum = oldcsum;
237 return __cpu_to_le32(csum);
240 void lafs_cluster_flush(struct lafs *fs, int cnum)
242 char chead_buf[4096];
243 struct cluster_head *ch;
244 struct lafs_cluster *wc = &fs->wc[cnum];
249 struct lafs_ino *current_inode = NULL;
250 off_t current_block = NoBlock;
251 struct descriptor *desc_start;
252 struct group_head *head_start = NULL;
253 struct lafs_device *dv;
255 trace(1, "cluster flush\n");
256 if (list_empty(&wc->blocks) &&
257 !(fs->checkpointing & CHECKPOINT_END)) {
258 trace(1, "...skipped\n");
262 ch = (void*)chead_buf;
264 wc->chead = chead_buf;
265 wc->chead_size = sizeof(struct cluster_head);
266 wc->chead_blocks = 1;
267 memcpy(ch->idtag, "LaFSHead", 8);
268 memcpy(ch->uuid, fs->uuid, 16);
269 ch->seq = __cpu_to_le64(fs->wc[cnum].seq);
272 cluster_size = seg_setsize(fs, &wc->seg,
273 seg_remainder(fs, &wc->seg) - wc->remaining);
275 /* find, and step over, address header block(s) */
276 for (i = 0; i < wc->chead_blocks ; i++)
277 head_addr[i] = seg_next(fs, &wc->seg);
280 if (cnum == 0 && fs->checkpointing) {
281 ch->flags = __cpu_to_le32(fs->checkpointing);
282 if (fs->checkpointing & CHECKPOINT_END)
283 fs->checkpointing = 0;
284 else if (fs->checkpointing & CHECKPOINT_START) {
285 fs->checkpoint_cluster = head_addr[0];
286 fs->checkpointing &= ~CHECKPOINT_START;
290 list_for_each_entry(b, &wc->blocks, leafs) {
291 if (b->ino != current_inode) {
292 /* need to create a new group_head */
295 cluster_closehead(wc, head_start);
296 cluster_addhead(wc, b->ino, cnum, &head_start);
297 current_inode = b->ino;
298 current_block = NoBlock;
300 if (desc_start == NULL || b->fileaddr != current_block+1 ||
301 (b->flags & B_Index)) {
302 cluster_adddesc(wc, b, &desc_start);
303 current_block = b->fileaddr;
306 cluster_incdesc(wc, desc_start, b, fs->blocksize);
308 lafs_allocated_block(b, seg_next(fs, &wc->seg));
311 cluster_closehead(wc, head_start);
313 ch->Hlength = __cpu_to_le16(wc->chead_size);
314 ch->Clength = __cpu_to_le16(cluster_size);
315 ch->verify_type = VerifyNull;
317 ch->next_addr = __cpu_to_le64(seg_addr(fs, &wc->seg));
318 ch->prev_addr = __cpu_to_le64(wc->prev_addr);
319 wc->prev_addr = head_addr[0];
320 ch->this_addr = __cpu_to_le64(wc->prev_addr);
321 ch->checksum = lafs_calc_cluster_csum(ch);
323 dv = dev_by_num(fs, wc->seg.dev);
325 for (i = 0; i < wc->chead_blocks; i++) {
328 virttophys(fs, head_addr[i], &dev, §);
329 lseek64(dv->fd, sect, SEEK_SET);
330 write(dv->fd, chead_buf+ i * fs->blocksize, fs->blocksize);
332 while (!list_empty(&wc->blocks)) {
336 b = list_first_entry(&wc->blocks, struct lafs_blk, leafs);
337 list_del_init(&b->leafs);
339 virttophys(fs, b->physaddr, &dev, §);
340 lseek64(dv->fd, sect, SEEK_SET);
341 b->flags &= ~B_Dirty;
342 write(dv->fd, b->data, fs->blocksize);