3 * Flush out the given cluster with a cluster head.
4 * We "know" there is room in the cluster head and
8 #define _FILE_OFFSET_BITS 64
10 #include <lafs/lafs.h>
17 /*-----------------------------------------------------------------------
18 * A segment is divided up in a slightly complicated way into
19 * tables, rows and columns. This allows us to align writes with
20 * stripes in a raid4 array or similar.
21 * So we have some support routines to help track our way around
24 * A write cluster always comprises a whole number of rows - it is padded
25 * with zeros if necessary.
26 * However block addressing follow columns, so the blocks written to
27 * a cluster may well not be contiguously addressed.
29 * There are 'width' blocks (or columns) in a row, and either:
30 * - segment_stride rows in a table, if segment_stride < segment_size or
31 * - one table per segment, when segment_stride > segment_size.
32 * These are calculated and summarised in rows_per_table and tables_per_seg
33 * in the 'struct lafs_device' structure.
35 * A 'seg_pos' records the current segment and where we are in it.
37 * dev,num to identify the segment
38 * st_table,st_row to identify where the cluster starts.
39 * nxt_table,nxt_row to identify where the cluster ends.
40 * table,row,col to identify where the next block will be found
43 static int seg_remainder(struct lafs *fs, struct lafs_segpos *seg)
45 /* return the number of blocks from the start of segpos to the
47 * i.e. remaining rows in this table, plus remaining tables in
50 struct lafs_device *dv = dev_by_num(fs, seg->dev);
51 int rows = dv->rows_per_table - seg->st_row;
53 if (seg->dev < 0) abort();
54 rows += dv->rows_per_table * (dv->tables_per_seg - seg->st_table - 1);
55 return rows * dv->width;
59 static void seg_step(struct lafs *fs, struct lafs_segpos *seg)
61 /* reposition this segpos to be immediately after it's current end
62 * and make the 'current' point be the start.
65 seg->st_table = seg->nxt_table;
66 seg->st_row = seg->nxt_row;
67 seg->table = seg->st_table;
68 seg->row = seg->st_row;
73 static u32 seg_setsize(struct lafs *fs, struct lafs_segpos *seg, u32 size)
75 /* move the 'nxt' table/row to be 'size' blocks beyond
76 * current start. size will be rounded up to a multiple
79 struct lafs_device *dv = dev_by_num(fs, seg->dev);
83 if (seg->dev < 0) abort();
84 rows = (size + dv->width - 1) / dv->width;
85 rv = rows * dv->width;
87 seg->nxt_table = seg->st_table + rows / dv->rows_per_table;
88 seg->nxt_row = rows % dv->rows_per_table;
92 static u64 seg_addr(struct lafs *fs, struct lafs_segpos *seg)
94 /* Return the virtual address of the blocks pointed
97 struct lafs_device *dv = dev_by_num(fs, seg->dev);
101 /* Setting 'next' address for last cluster in
102 * a cleaner segment */
104 addr = seg->col * dv->stride;
106 addr += seg->table * dv->rows_per_table;
107 addr += seg->num * dv->segment_stride;
112 static u64 seg_next(struct lafs *fs, struct lafs_segpos *seg)
114 /* step forward one block, returning the address of
115 * the block stepped over
117 struct lafs_device *dv = dev_by_num(fs, seg->dev);
118 u64 addr = seg_addr(fs, seg);
120 /* now step forward in column or table or seg */
122 if (seg->col >= dv->width) {
125 if (seg->row >= dv->rows_per_table) {
133 /*-------------------------------------------------------------------------
134 * Cluster head building.
135 * We build the cluster head bit by bit as we find blocks
136 * in the list. These routines help.
139 static void cluster_addhead(struct lafs_cluster *wc, struct lafs_ino *ino,
141 struct group_head **headstart)
143 struct group_head *gh = (void*)((char *)wc->chead +
148 gh->inum = __cpu_to_le32(ino->inum);
149 gh->fsnum = __cpu_to_le32(ino->filesys->inum);
151 tnf = ((ino->generation<<8) | (ino->trunc_gen & 0xff))
155 gh->truncatenum_and_flag = __cpu_to_le16(tnf);
156 wc->chead_size += sizeof(struct group_head);
159 static void cluster_closehead(struct lafs_cluster *wc,
160 struct group_head *headstart)
162 int size = wc->chead_size - (((char *)headstart) - (char *)wc->chead);
164 headstart->group_size_words = size / 4;
168 static void cluster_addmini(struct lafs_cluster *wc, u32 addr, int offset,
169 int size, const char *data,
170 int size2, const char *data2)
172 /* if size2 !=0, then only
173 * (size-size2) is at 'data' and the rest is at 'data2'
175 struct miniblock *mb = ((struct miniblock *)
176 ((char *)wc->chead + wc->chead_size));
178 mb->block_num = __cpu_to_le32(addr);
179 mb->block_offset = __cpu_to_le16(offset);
180 mb->length = __cpu_to_le16(size + DescMiniOffset);
181 wc->chead_size += sizeof(struct miniblock);
182 memcpy(mb->data, data, size-size2);
184 memcpy(mb->data + size-size2, data2, size2);
185 memset(mb->data+size, 0, (-size)&3);
186 wc->chead_size += ROUND_UP(size);
190 static void cluster_adddesc(struct lafs_cluster *wc, struct lafs_blk *blk,
191 struct descriptor **desc_start)
193 struct descriptor *dh = (struct descriptor *)((char *)wc->chead +
196 dh->block_num = __cpu_to_le32(blk->fileaddr);
197 dh->block_cnt = __cpu_to_le32(0);
199 if (blk->flags && B_Index)
200 dh->block_bytes = DescIndex;
201 wc->chead_size += sizeof(struct descriptor);
204 static void cluster_incdesc(struct lafs_cluster *wc, struct descriptor *desc_start,
205 struct lafs_blk *b, int blksz)
207 desc_start->block_cnt =
208 __cpu_to_le32(__le32_to_cpu(desc_start->block_cnt)+1);
209 if (!(b->flags & B_Index)) {
210 if (b->ino->type >= TypeBase) {
211 u64 size = b->ino->md.file.size;
212 if (size > ((loff_t)b->fileaddr * blksz) &&
213 size <= ((loff_t)(b->fileaddr + 1) * blksz))
214 desc_start->block_bytes =
215 __cpu_to_le32(size & (blksz-1));
217 desc_start->block_bytes =
218 __cpu_to_le32(blksz);
220 desc_start->block_bytes = __cpu_to_le32(blksz);
225 int lafs_calc_cluster_csum(struct cluster_head *head)
227 unsigned int oldcsum = head->checksum;
231 csum = crc32(0, (uint32_t *)head, __le16_to_cpu(head->Hlength));
232 head->checksum = oldcsum;
233 return __cpu_to_le32(csum);
236 void lafs_cluster_flush(struct lafs *fs, int cnum)
238 char chead_buf[4096];
239 struct cluster_head *ch;
240 struct lafs_cluster *wc = &fs->wc[cnum];
245 struct lafs_ino *current_inode = NULL;
246 off_t current_block = NoBlock;
247 struct descriptor *desc_start;
248 struct group_head *head_start = NULL;
249 struct lafs_device *dv;
251 trace(1, "cluster flush\n");
252 if (list_empty(&wc->blocks) &&
253 !(fs->checkpointing & CHECKPOINT_END)) {
254 trace(1, "...skipped\n");
258 ch = (void*)chead_buf;
260 wc->chead = chead_buf;
261 wc->chead_size = sizeof(struct cluster_head);
262 wc->chead_blocks = 1;
263 memcpy(ch->idtag, "LaFSHead", 8);
264 memcpy(ch->uuid, fs->uuid, 16);
265 ch->seq = __cpu_to_le64(fs->wc[cnum].seq);
268 cluster_size = seg_setsize(fs, &wc->seg,
269 seg_remainder(fs, &wc->seg) - wc->remaining);
271 /* find, and step over, address header block(s) */
272 for (i = 0; i < wc->chead_blocks ; i++)
273 head_addr[i] = seg_next(fs, &wc->seg);
276 if (cnum == 0 && fs->checkpointing) {
277 ch->flags = __cpu_to_le32(fs->checkpointing);
278 if (fs->checkpointing & CHECKPOINT_END)
279 fs->checkpointing = 0;
280 else if (fs->checkpointing & CHECKPOINT_START) {
281 fs->checkpoint_cluster = head_addr[0];
282 fs->checkpointing &= ~CHECKPOINT_START;
286 list_for_each_entry(b, &wc->blocks, leafs) {
287 if (b->ino != current_inode) {
288 /* need to create a new group_head */
291 cluster_closehead(wc, head_start);
292 cluster_addhead(wc, b->ino, cnum, &head_start);
293 current_inode = b->ino;
294 current_block = NoBlock;
296 if (desc_start == NULL || b->fileaddr != current_block+1 ||
297 (b->flags & B_Index)) {
298 cluster_adddesc(wc, b, &desc_start);
299 current_block = b->fileaddr;
302 cluster_incdesc(wc, desc_start, b, fs->blocksize);
304 lafs_allocated_block(b, seg_next(fs, &wc->seg));
307 cluster_closehead(wc, head_start);
309 ch->Hlength = __cpu_to_le16(wc->chead_size);
310 ch->Clength = __cpu_to_le16(cluster_size);
311 ch->verify_type = VerifyNull;
313 ch->next_addr = __cpu_to_le64(seg_addr(fs, &wc->seg));
314 ch->prev_addr = __cpu_to_le64(wc->prev_addr);
315 wc->prev_addr = head_addr[0];
316 ch->this_addr = __cpu_to_le64(wc->prev_addr);
317 ch->checksum = lafs_calc_cluster_csum(ch);
319 dv = dev_by_num(fs, wc->seg.dev);
321 for (i = 0; i < wc->chead_blocks; i++) {
324 virttophys(fs, head_addr[i], &dev, §);
325 lseek64(dv->fd, sect, SEEK_SET);
326 write(dv->fd, chead_buf+ i * fs->blocksize, fs->blocksize);
328 while (!list_empty(&wc->blocks)) {
332 b = list_first_entry(&wc->blocks, struct lafs_blk, leafs);
333 list_del_init(&b->leafs);
335 virttophys(fs, b->physaddr, &dev, §);
336 lseek64(dv->fd, sect, SEEK_SET);
337 b->flags &= ~B_Dirty;
338 write(dv->fd, b->data, fs->blocksize);