return CARRY_ON;
}
diff -urN v2.4.19p7/fs/reiserfs/do_balan.c linux/fs/reiserfs/do_balan.c
--- v2.4.19p7/fs/reiserfs/do_balan.c Mon Apr 1 13:24:15 2002
+++ linux/fs/reiserfs/do_balan.c Wed May 1 03:54:31 2002
@@ -437,24 +437,13 @@
ih_item_len( B_N_PITEM_HEAD(tb->L[0],n+item_pos-ret_val)),
l_n,body, zeros_num > l_n ? l_n : zeros_num
);
-
+#if 0
RFALSE( l_n &&
is_indirect_le_ih(B_N_PITEM_HEAD
(tb->L[0],
n + item_pos - ret_val)),
"PAP-12110: pasting more than 1 unformatted node pointer into indirect item");
-
- /* 0-th item in S0 can be only of DIRECT type when l_n != 0*/
- {
- int version;
-
- version = ih_version (B_N_PITEM_HEAD (tbS0, 0));
- set_le_key_k_offset (version, B_N_PKEY (tbS0, 0),
- le_key_k_offset (version, B_N_PKEY (tbS0, 0)) + l_n);
- version = ih_version (B_N_PITEM_HEAD(tb->CFL[0],tb->lkey[0]));
- set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]),
- le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) + l_n);
- }
+#endif
/* Calculate new body, position in item and insert_size[0] */
if ( l_n > zeros_num ) {
@@ -522,7 +511,7 @@
);
/* if appended item is indirect item, put unformatted node into un list */
if (is_indirect_le_ih (pasted))
- set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace);
+ set_ih_free_space (pasted, 0);
tb->insert_size[0] = 0;
zeros_num = 0;
}
@@ -550,7 +539,7 @@
{ /* new item or its part falls to R[0] */
if ( item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1 )
{ /* part of new item falls into R[0] */
- int old_key_comp, old_len, r_zeros_number;
+ loff_t old_key_comp, old_len, r_zeros_number;
const char * r_body;
int version;
loff_t offset;
@@ -692,12 +681,17 @@
-// it is called by get_block when create == 0. Returns block number
-// for 'block'-th logical block of file. When it hits direct item it
-// returns 0 (being called from bmap) or read direct item into piece
-// of page (bh_result)
+/*
+** Get block number from the indirect item by position.
+*/
+static inline long iitem_get_blocknr (struct path *path, int pos)
+{
+ struct buffer_head * bh = get_last_bh (path);
+ struct item_head * ih = get_ih (path);
+ __u32 * ind_item;
-// Please improve the english/clarity in the comment above, as it is
-// hard to understand.
+ if (is_indirect_le_ih (ih)) {
+ ind_item = (__u32 *)B_I_PITEM (bh, ih);
+ return le32_to_cpu(ind_item [path->pos_in_item + pos]);
+ }
-static int _get_block_create_0 (struct inode * inode, long block,
- struct buffer_head * bh_result,
- int args)
+ return 0;
+}
+
+/*
+** Get the indirect item size.
+*/
+static inline int iitem_size (struct path *path)
{
- INITIALIZE_PATH (path);
- struct cpu_key key;
- struct buffer_head * bh;
- struct item_head * ih, tmp_ih;
- int fs_gen ;
- int blocknr;
- char * p = NULL;
- int chars;
- int ret ;
- int done = 0 ;
- unsigned long offset ;
+ struct item_head * ih = get_ih (path);
+ return (I_UNFM_NUM(ih) - (path->pos_in_item + 1));
+}
- // prepare the key to look for the 'block'-th block of file
- make_cpu_key (&key, inode,
- (loff_t)block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3);
+/*
+** Return "1" if last position of the indirect item reached,
+** "0" - otherwise.
+*/
+static inline int last_pos_of_iitem (struct path *path, int pos)
+{
+ struct item_head * ih = get_ih (path);
+ return ((path->pos_in_item + 1 + pos) >= (I_UNFM_NUM(ih)) ? 1 : 0);
+}
-research:
- if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) {
- pathrelse (&path);
- if (p)
- kunmap(bh_result->b_page) ;
- // We do not return -ENOENT if there is a hole but page is uptodate, because it means
- // That there is some MMAPED data associated with it that is yet to be written to disk.
- if ((args & GET_BLOCK_NO_HOLE) && !Page_Uptodate(bh_result->b_page) ) {
- return -ENOENT ;
- }
- return 0 ;
+/*
+** Get the number of contiguous blocks in the indirect item
+** from given pos to the end of the item.
+*/
+static inline int iitem_amount_contiguous (struct path *path, int pos)
+{
+ long curr = 0;
+ long next = 0;
+ int item_size = iitem_size(path);
+ int amount = 1;
+
+ if (pos >= item_size) {
+ return 0;
+ }
+ curr = iitem_get_blocknr(path, pos++);
+
+ if (curr==0) {
+ while (pos <= item_size) {
+ next = iitem_get_blocknr(path, pos++);
+ if (next != 0) break;
+ amount++;
+ }
+ return amount;
+ }
+
+ while (pos <= item_size) {
+ next = iitem_get_blocknr(path, pos++);
+ if ((next - curr) != 1) break;
+ curr = next;
+ amount++;
+ }
+
+ return amount;
+}
+
+/*
+** Return "1" if fs changed and item moved.
+*/
+static inline int need_research (int fs_gen, struct super_block * sb,
+ struct item_head * ih, struct path * path )
+{
+ return (fs_changed(fs_gen, sb) && item_moved(ih, path));
+}
+
+/* Fill indirect item cache.
+** Put N block numbers from current indirect item.
+*/
+static inline void iicache_fill (struct inode * inode, long block,
+ struct path * path, struct cpu_key * key)
+{
+ long blocknr=0, blk=block;
+ int pos=0;
+ int amount=0,i=0;
+ int asize = 8;
+ int iic_size = (sizeof(struct iicache)) * asize;
+ struct super_block * sb = inode->i_sb;
+ struct item_head * ih = get_ih (path);
+
+ if (inode->u.reiserfs_i.iic==NULL) {
+ inode->u.reiserfs_i.iic = (struct iicache *)kmalloc(iic_size, GFP_NOFS);
+ if (inode->u.reiserfs_i.iic==NULL) {
+ return;
+ }
+ iicache_set_asize(inode, asize);
+ }
+ iicache_clear(inode);
+
+ if (search_for_position_by_key (sb, key, path) != POSITION_FOUND) {
+ return;
+ }
+
+ for (i=0; i<iicache_get_asize(inode); i++) {
+
+ amount = iitem_amount_contiguous (path, pos);
+ blocknr = iitem_get_blocknr (path, pos);
+
+ if ((amount>0) && (amount<=1012)) {
+ iicache_set (inode, amount, IICACHE_SIZE, i);
+ iicache_set (inode, blk, IICACHE_BLOCK, i);
+ iicache_set (inode, blocknr, IICACHE_BLOCKNR,i);
+ } else {
+ break;
}
-
- //
- bh = get_last_bh (&path);
- ih = get_ih (&path);
- if (is_indirect_le_ih (ih)) {
- __u32 * ind_item = (__u32 *)B_I_PITEM (bh, ih);
+
+ pos += amount;
+ blk += amount;
+
+ if (pos <= last_pos_of_iitem(path, pos)) continue;
+
+ if((blk * sb->s_blocksize) < inode->i_size) {
+ if ((i+1) < iicache_get_asize(inode)) {
+ set_cpu_key_k_offset (key, cpu_key_k_offset(key) + pos * sb->s_blocksize);
+
+ if (search_for_position_by_key (sb, key, path) != POSITION_FOUND) {
+ break;
+ }
+
+ ih = get_ih (path);
+ if (!is_indirect_le_ih(ih) ||
+ (le_ih_k_offset(ih) + path->pos_in_item) > inode->i_size) {
+ break ;
+ }
+ pos=0; amount=0;
- /* FIXME: here we could cache indirect item or part of it in
- the inode to avoid search_by_key in case of subsequent
- access to file */
- blocknr = get_block_num(ind_item, path.pos_in_item) ;
- ret = 0 ;
- if (blocknr) {
- bh_result->b_dev = inode->i_dev;
- bh_result->b_blocknr = blocknr;
- bh_result->b_state |= (1UL << BH_Mapped);
- } else
- // We do not return -ENOENT if there is a hole but page is uptodate, because it means
- // That there is some MMAPED data associated with it that is yet to be written to disk.
- if ((args & GET_BLOCK_NO_HOLE) && !Page_Uptodate(bh_result->b_page) ) {
- ret = -ENOENT ;
- }
+ }
+ }
+ }
- pathrelse (&path);
- if (p)
- kunmap(bh_result->b_page) ;
- return ret ;
+ if (i < iicache_get_asize(inode)) {
+ iicache_clear_from_pos(inode, i);
+ }
+
+
+}
+
+/*
+** Truncate indirect item cache.
+*/
+static inline void iicache_truncate (struct inode * inode)
+{
+ long new_file_end = inode->i_size >> inode->i_blkbits;
+ long slot=0, amount=0, blk=0;
+
+ if (inode->u.reiserfs_i.iic==NULL) return;
+
+ if (new_file_end==0) {
+ iicache_clear(inode);
+ }
+
+ slot = block_is_iicached(inode, new_file_end);
+ if (slot > 0) {
+ slot--;
+ blk = iicache_get (inode, IICACHE_BLOCK, slot);
+ amount = new_file_end - blk;
+ if (amount > 0) {
+ iicache_set (inode, amount, IICACHE_SIZE, slot);
+ } else {
+ iicache_set (inode, 0, IICACHE_SIZE, slot);
+ iicache_set (inode, 0, IICACHE_BLOCK, slot);
+ iicache_set (inode, 0, IICACHE_BLOCKNR, slot);
}
+ }
+
+}
+
+
+/*
+** Helper function for _get_block_create_0
+*/
+static inline int iitem_map_indirect_block (struct path * path, struct inode * inode,
+ long block, struct buffer_head * bh_result,
+ int args, struct cpu_key * key)
+{
+ struct buffer_head * bh = get_last_bh (path);
+ struct item_head * ih = get_ih (path);
+ __u32 * ind_item = (__u32 *)B_I_PITEM (bh, ih);
+ int blocknr= get_block_num(ind_item, path->pos_in_item) ;
+
+ // We do not return -ENOENT if there is a hole but page is uptodate, because it means
+ // That there is some MMAPED data associated with it that is yet to be written to disk.
+ if (!blocknr && (args & GET_BLOCK_NO_HOLE)&& !Page_Uptodate(bh_result->b_page)) {
+ return -ENOENT ;
+ }
+
+ // map the found block
+ set_block_dev_mapped (bh_result, blocknr, inode);
+
+ return 0;
+}
+
+
+
+/*
+** Helper function for _get_block_create_0
+*/
+static inline void path_relse_page_unmap (struct path * path, char * p,
+ struct page * page) {
+ pathrelse(path);
+ if (p)
+ kunmap(page);
+}
+
+/*
+** Handle Indirect Item case and simple direct case.
+** "gbc0" stands for "get_block_create_0"
+*/
+static inline int gbc0_indirect_case (char * p, struct path * path,
+ struct inode *inode, long block,
+ struct buffer_head * bh_result,
+ int args, struct cpu_key * key)
+{
+ struct super_block * sb = inode->i_sb;
+ struct page * page = bh_result->b_page;
+ struct item_head * ih = get_ih (path);
+ int ret=0;
+
+ // requested data are in indirect item(s)
+ if (is_indirect_le_ih (ih)) {
+
+ ret = iitem_map_indirect_block (path, inode, block, bh_result, args, key);
+ if (ret<0) {
+ path_relse_page_unmap (path, p, page);
+ return ret;
+ }
+
+ if (p)
+ kunmap(page);
+
+ /*
+ ** Here we fill indirect item cache or part of it
+ ** in the inode to avoid search_by_key in case of
+ ** subsequent access to file.
+ */
+ iicache_fill (inode, block, path, key);
+ pathrelse(path);
+ return 0 ;
+ }
+
+ return 1;
+}
+
+/*
+** Direct Item case start.
+** "gbc0" stands for "get_block_create_0"
+*/
+static inline int gbc0_direct_case_start (char * p, struct path * path,
+ struct inode *inode,
+ struct buffer_head * bh_result,
+ int args)
+{
+ struct page * page = bh_result->b_page;
// requested data are in direct item(s)
if (!(args & GET_BLOCK_READ_DIRECT)) {
- // we are called by bmap. FIXME: we can not map block of file
- // when it is stored in direct item(s)
- pathrelse (&path);
- if (p)
- kunmap(bh_result->b_page) ;
- return -ENOENT;
+ // we are called by bmap. FIXME: we can not map block of file
+ // when it is stored in direct item(s)
+ path_relse_page_unmap (path, p, page);
+ return -ENOENT;
}
/* if we've got a direct item, and the buffer was uptodate,
@@ -324,89 +521,199 @@
** end, where we map the buffer and return
*/
if (buffer_uptodate(bh_result)) {
- goto finished ;
- } else
- /*
- ** grab_tail_page can trigger calls to reiserfs_get_block on up to date
- ** pages without any buffers. If the page is up to date, we don't want
- ** read old data off disk. Set the up to date bit on the buffer instead
- ** and jump to the end
- */
- if (Page_Uptodate(bh_result->b_page)) {
- mark_buffer_uptodate(bh_result, 1);
- goto finished ;
+ set_block_dev_mapped (bh_result, 0, inode);
+ path_relse_page_unmap (path, p, page);
+ return 0;
+ } else {
+ /*
+ ** grab_tail_page can trigger calls to reiserfs_get_block on up to date
+ ** pages without any buffers. If the page is up to date, we don't want
+ ** read old data off disk. Set the up to date bit on the buffer instead
+ ** and jump to the end
+ */
+ if (Page_Uptodate(bh_result->b_page)) {
+ mark_buffer_uptodate(bh_result, 1);
+ set_block_dev_mapped (bh_result, 0, inode);
+ path_relse_page_unmap (path, p, page);
+ return 0;
+ }
}
+ return 1;
+}
- // read file tail into part of page
- offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1) ;
- fs_gen = get_generation(inode->i_sb) ;
- copy_item_head (&tmp_ih, ih);
-
- /* we only want to kmap if we are reading the tail into the page.
- ** this is not the common case, so we don't kmap until we are
- ** sure we need to. But, this means the item might move if
- ** kmap schedules
+/*
+** Handle Direct Item case.
+** "gbc0" stands for "get_block_create_0"
+*/
+static inline void gbc0_direct_case (char * p, struct path * path,
+ struct inode *inode,
+ struct cpu_key * key)
+{
+ struct buffer_head * bh;
+ struct super_block * sb = inode->i_sb;
+ struct item_head * ih = get_ih (path);
+ int chars=0, done=0;
+
+ do {
+ if (!is_direct_le_ih (ih)) {
+ BUG ();
+ }
+ /* make sure we don't read more bytes than actually exist in
+ ** the file. This can happen in odd cases where i_size isn't
+ ** correct, and when direct item padding results in a few
+ ** extra bytes at the end of the direct item
*/
- if (!p) {
- p = (char *)kmap(bh_result->b_page) ;
- if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
- goto research;
- }
+ if ((le_ih_k_offset(ih) + path->pos_in_item) > inode->i_size)
+ break ;
+
+ if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) {
+ chars = inode->i_size - (le_ih_k_offset(ih) - 1) - path->pos_in_item;
+ done = 1 ;
+ } else {
+ chars = ih_item_len(ih) - path->pos_in_item;
}
- p += offset ;
- memset (p, 0, inode->i_sb->s_blocksize);
- do {
- if (!is_direct_le_ih (ih)) {
- BUG ();
- }
- /* make sure we don't read more bytes than actually exist in
- ** the file. This can happen in odd cases where i_size isn't
- ** correct, and when direct item padding results in a few
- ** extra bytes at the end of the direct item
- */
- if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size)
- break ;
- if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) {
- chars = inode->i_size - (le_ih_k_offset(ih) - 1) - path.pos_in_item;
- done = 1 ;
- } else {
- chars = ih_item_len(ih) - path.pos_in_item;
- }
- memcpy (p, B_I_PITEM (bh, ih) + path.pos_in_item, chars);
- if (PATH_LAST_POSITION (&path) != (B_NR_ITEMS (bh) - 1))
- // we done, if read direct item is not the last item of
- // node FIXME: we could try to check right delimiting key
- // to see whether direct item continues in the right
- // neighbor or rely on i_size
- break;
+ p += chars;
- // update key to look for the next piece
- set_cpu_key_k_offset (&key, cpu_key_k_offset (&key) + chars);
- if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND)
- // we read something from tail, even if now we got IO_ERROR
- break;
- bh = get_last_bh (&path);
- ih = get_ih (&path);
- } while (1);
+ if (PATH_LAST_POSITION (path) != (B_NR_ITEMS (bh) - 1))
+ // we done, if read direct item is not the last item of
+ // node FIXME: we could try to check right delimiting key
+ // to see whether direct item continues in the right
+ // neighbor or rely on i_size
+ break;
- flush_dcache_page(bh_result->b_page) ;
- kunmap(bh_result->b_page) ;
+ // update key to look for the next piece
+ set_cpu_key_k_offset (key, cpu_key_k_offset(key) + chars);
-finished:
- pathrelse (&path);
- bh_result->b_blocknr = 0 ;
- bh_result->b_dev = inode->i_dev;
- mark_buffer_uptodate (bh_result, 1);
- bh_result->b_state |= (1UL << BH_Mapped);
+ if (search_for_position_by_key (sb, key, path) != POSITION_FOUND)
+ // we read something from tail, even if now we got IO_ERROR
+ break;
+
+ bh = get_last_bh (path);
+ ih = get_ih (path);
+
+ } while (1);
+
+}
+
+
+/*
+** Helper function for _get_block_create_0
+** Check iicache.
+** If needed block is in iicache we map it and return "1".
+*/
+static int check_iicache (struct inode * inode, long block,
+ struct buffer_head * bh_result, int w_flag)
+{
+ struct super_block * sb = inode->i_sb;
+ int n=0, block_nr=0;
+
+ /*
+ ** Here we use the cache of indirect item.
+ ** Getting the unfm_block number from the cache
+ ** we are trying to avoid some of the search_by_key() calls.
+ */
+ if (inode->u.reiserfs_i.iic==NULL) {
return 0;
+ }
+ // Check iicache and get the iicache array number + 1 ,
+ // where the needed block_nr corresponded given logical block
+ // could be found.
+ n = block_is_iicached(inode, block);
+
+ // if the iicache is not empty for this file and
+ // the requested logical block of file is cached
+ // then we return corresponded block number.
+ if (n>0) {
+ block_nr = iicache_get_blocknr_by_block(inode, block, n-1);
+
+ if (w_flag && block_nr==0) return 0; /* do not write to hole */
+
+ if ((block_nr > 0)) {
+ set_block_dev_mapped (bh_result, block_nr, inode);
+ return 1;
+ }
+ }
+ return 0;
}
+//
+// It is called by reiserfs_get_block when create == 0.
+// Returns disk block number by logical block number of file.
+//
+// When it hits direct item it returns 0 (being called from bmap)
+// or read direct item into piece of page (bh_result)
+//
+static int _get_block_create_0 (struct inode * inode, long block,
+ struct buffer_head * bh_result,
+ int args)
+{
+ INITIALIZE_PATH (path);
+ struct cpu_key key;
+ struct item_head * ih, tmp_ih;
+ struct super_block * sb = inode->i_sb;
+ struct page * page = bh_result->b_page;
+ char * p = NULL;
+ unsigned long offset ;
+ int fs_gen=0, ret=0, block_iicached=0;
+
+
+ block_iicached = check_iicache (inode, block, bh_result, 0);
+ if (block_iicached) {
+ return 0;
+ }
+
+ // prepare the key to look for the 'block'-th block of file
+ offset = block * sb->s_blocksize + 1;
+ make_cpu_key (&key, inode, (loff_t)offset, TYPE_ANY, 3);
+
+ do {
+
+ if (search_for_position_by_key (sb, &key, &path) != POSITION_FOUND) {
+ path_relse_page_unmap (&path, p, page);
+ // We do not return -ENOENT if there is a hole but page is uptodate, because it means
+ // That there is some MMAPED data associated with it that is yet to be written to disk.
+ return (((args & GET_BLOCK_NO_HOLE) && !Page_Uptodate(bh_result->b_page)) ? (-ENOENT) : 0 ) ;
+ }
+
+ // check and handle indirect case
+ ret = gbc0_indirect_case (p, &path, inode, block, bh_result, args, &key);
+ if (ret <= 0)
+ return ret;
+
+ // start the direct case
+ ret = gbc0_direct_case_start (p, &path, inode, bh_result, args);
+ if (ret <= 0)
+ return ret;
+
+ // we should read the file tail into part of page.
+ offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1) ;
+ fs_gen = get_generation(sb) ;
+ ih = get_ih (&path);
+ copy_item_head (&tmp_ih, ih);
+ if (!p)
+ p=(char *)kmap(page);
+
+ } while (need_research(fs_gen, sb, &tmp_ih, &path));
+
+ // ok, we have direct item and kmapped page,
+ // do copy from direct item to page now.
+ p += offset;
+ memset (p, 0, sb->s_blocksize);
+ gbc0_direct_case (p, &path, inode, &key);
+
+ flush_dcache_page(page) ;
+ path_relse_page_unmap (&path, p, page);
+ set_block_dev_mapped (bh_result, 0, inode);
+ mark_buffer_uptodate (bh_result, 1);
+ return 0;
+}
// this is called to create file map. So, _get_block_create_0 will not
// read direct item
@@ -560,10 +867,13 @@
struct cpu_key key;
struct buffer_head * bh, * unbh = 0;
struct item_head * ih, tmp_ih;
+ struct super_block * sb = inode->i_sb;
__u32 * item;
int done;
int fs_gen;
int windex ;
+ int block_iicached=0;
+
struct reiserfs_transaction_handle th ;
/* space reserved in transaction batch:
. 3 balancings in direct->indirect conversion
@@ -590,6 +900,7 @@
return -EFBIG;
}
+
/* if !create, we aren't changing the FS, so we don't need to
** log anything, so we don't need to start a transaction
*/
@@ -601,19 +912,30 @@
unlock_kernel() ;
return ret;
}
+ /*
+ ** If iicache hash needed disk block number and it is not hole
+ ** we return it from iicache.
+ */
+ block_iicached = check_iicache (inode, block, bh_result, 1);
+ if (block_iicached) {
+ unlock_kernel() ;
+ return 0;
+ }
- /* set the key of the first byte in the 'block'-th block of file */
- make_cpu_key (&key, inode, new_offset,
- TYPE_ANY, 3/*key length*/);
+ /* set the key of the first byte
+ in the 'block'-th block of file */
+ make_cpu_key (&key, inode, new_offset, TYPE_ANY, 3);
+
if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) {
journal_begin(&th, inode->i_sb, jbegin_count) ;
reiserfs_update_inode_transaction(inode) ;
transaction_started = 1 ;
}
+
research:
retval = search_for_position_by_key (inode->i_sb, &key, &path);
@@ -683,14 +1005,20 @@
inode->i_blocks += (inode->i_sb->s_blocksize / 512) ;
reiserfs_update_sd(&th, inode) ;
}
+
+ /*
+ ** do inserting of new block and block_nr
+ ** to iicache as well
+ */
+ iicache_insert(inode, block, unfm_ptr);
+
set_block_dev_mapped(bh_result, unfm_ptr, inode);
pathrelse (&path);
pop_journal_writer(windex) ;
if (transaction_started)
journal_end(&th, inode->i_sb, jbegin_count) ;
-
unlock_kernel() ;
-
+
/* the item was found, so new blocks were not added to the file
** there is no need to make sure the inode is updated with this
** transaction
@@ -811,36 +1139,69 @@
pointer to 'block'-th block use block, which is already
allocated */
struct cpu_key tmp_key;
- struct unfm_nodeinfo un = {0, 0};
+ unp_t unf_single=0; // We use this in case we need to allocate only
+ // one block which is a fastpath
+ unp_t *un;
+ __u64 max_to_insert=MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE;
+ __u64 blocks_needed;
RFALSE( pos_in_item != ih_item_len(ih) / UNFM_P_SIZE,
"vs-804: invalid position for append");
+
/* indirect item has to be appended, set up key of that position */
make_cpu_key (&tmp_key, inode,
le_key_k_offset (version, &(ih->ih_key)) + op_bytes_number (ih, inode->i_sb->s_blocksize),
//pos_in_item * inode->i_sb->s_blocksize,
TYPE_INDIRECT, 3);// key type is unimportant
-
- if (cpu_key_k_offset (&tmp_key) == cpu_key_k_offset (&key)) {
+
+ blocks_needed = 1 + ((cpu_key_k_offset (&key) - cpu_key_k_offset (&tmp_key)) >> inode->i_sb->s_blocksize_bits);
+ RFALSE( blocks_needed < 0, "green-805: invalid offset");
+
+ if ( blocks_needed == 1 ) {
+ un = &unf_single;
+ } else {
+ un=kmalloc( min(blocks_needed,max_to_insert)*UNFM_P_SIZE,
+ GFP_ATOMIC); // We need to avoid scheduling.
+ if ( !un) {
+ un = &unf_single;
+ blocks_needed = 1;
+ max_to_insert = 0;
+ }
+ memset(un, 0, UNFM_P_SIZE * min(blocks_needed,max_to_insert));
+ }
+ if ( blocks_needed <= max_to_insert) {
/* we are going to add target block to the file. Use allocated
block for that */
- un.unfm_nodenum = cpu_to_le32 (allocated_block_nr);
+ un[blocks_needed-1] = cpu_to_le32 (allocated_block_nr);
set_block_dev_mapped (bh_result, allocated_block_nr, inode);
bh_result->b_state |= (1UL << BH_New);
done = 1;
} else {
/* paste hole to the indirect item */
+ // If kmalloc failed, max_to_insert becomes zero and it means we
+ // only have space for one block
+ blocks_needed=max_to_insert?max_to_insert:1;
}
- retval = reiserfs_paste_into_item (&th, &path, &tmp_key, (char *)&un, UNFM_P_SIZE);
+ retval = reiserfs_paste_into_item (&th, &path, &tmp_key, (char *)un, UNFM_P_SIZE * blocks_needed);
+
+ if (blocks_needed != 1)
+ kfree(un);
+
if (retval) {
reiserfs_free_block (&th, allocated_block_nr);
goto failure;
}
- if (un.unfm_nodenum)
+ if (done) {
inode->i_blocks += inode->i_sb->s_blocksize / 512;
+ } else {
+ // We need to mark new file size in case this function will be
+ // interrupted/aborted later on. And we may do this only for
+ // holes.
+ inode->i_size += inode->i_sb->s_blocksize * blocks_needed;
+ }
//mark_tail_converted (inode);
}
-
+
if (done == 1)
break;
@@ -1531,6 +1892,8 @@
/* item head of new item */
ih.ih_key.k_dir_id = INODE_PKEY (dir)->k_objectid;
ih.ih_key.k_objectid = cpu_to_le32 (reiserfs_get_unused_objectid (th));
+
+
if (!ih.ih_key.k_objectid) {
iput(inode) ;
*err = -ENOMEM;
@@ -1757,6 +2120,7 @@
*/
void reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) {
struct reiserfs_transaction_handle th ;
+ struct super_block * sb = p_s_inode->i_sb;
int windex ;
/* we want the offset for the first byte after the end of the file */
@@ -1778,6 +2142,11 @@
bh = NULL ;
}
}
+
+ /*
+ ** Truncate iicache as well
+ */
+ iicache_truncate (p_s_inode);
/* so, if page != NULL, we have a buffer head for the offset at
** the end of the file. if the bh is mapped, and bh->b_blocknr != 0,
diff -urN v2.4.19p7/fs/reiserfs/journal.c linux/fs/reiserfs/journal.c
--- v2.4.19p7/fs/reiserfs/journal.c Thu Apr 18 16:30:16 2002
+++ linux/fs/reiserfs/journal.c Wed May 1 03:54:14 2002
@@ -64,12 +64,15 @@
*/
static int reiserfs_mounted_fs_count = 0 ;
+static struct list_head kreiserfsd_supers = LIST_HEAD_INIT(kreiserfsd_supers);
+
/* wake this up when you add something to the commit thread task queue */
DECLARE_WAIT_QUEUE_HEAD(reiserfs_commit_thread_wait) ;
/* wait on this if you need to be sure you task queue entries have been run */
static DECLARE_WAIT_QUEUE_HEAD(reiserfs_commit_thread_done) ;
DECLARE_TASK_QUEUE(reiserfs_commit_thread_tq) ;
+DECLARE_MUTEX(kreiserfsd_sem) ;
#define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit
structs at 4k */
@@ -576,17 +579,12 @@
/* lock the current transaction */
inline static void lock_journal(struct super_block *p_s_sb) {
PROC_INFO_INC( p_s_sb, journal.lock_journal );
- while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_wlock)) > 0) {
- PROC_INFO_INC( p_s_sb, journal.lock_journal_wait );
- sleep_on(&(SB_JOURNAL(p_s_sb)->j_wait)) ;
- }
- atomic_set(&(SB_JOURNAL(p_s_sb)->j_wlock), 1) ;
+ down(&SB_JOURNAL(p_s_sb)->j_lock);
}
@@ -1220,7 +1217,6 @@
if (run++ == 0) {
goto loop_start ;
}
-
atomic_set(&(jl->j_flushing), 0) ;
wake_up(&(jl->j_flush_wait)) ;
return ret ;
@@ -1250,7 +1246,7 @@
while(i != start) {
jl = SB_JOURNAL_LIST(s) + i ;
age = CURRENT_TIME - jl->j_timestamp ;
- if (jl->j_len > 0 && // age >= (JOURNAL_MAX_COMMIT_AGE * 2) &&
+ if (jl->j_len > 0 && age >= JOURNAL_MAX_COMMIT_AGE &&
atomic_read(&(jl->j_nonzerolen)) > 0 &&
atomic_read(&(jl->j_commit_left)) == 0) {
@@ -1325,6 +1321,10 @@
static int do_journal_release(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, int error) {
struct reiserfs_transaction_handle myth ;
+ down(&kreiserfsd_sem);
+ list_del(&p_s_sb->u.reiserfs_sb.s_reiserfs_supers);
+ up(&kreiserfsd_sem);
+
/* we only want to flush out transactions if we were called with error == 0
*/
if (!error && !(p_s_sb->s_flags & MS_RDONLY)) {
@@ -1811,10 +1811,6 @@
jl = SB_JOURNAL_LIST(ct->p_s_sb) + ct->jindex ;
@@ -2159,7 +2169,7 @@
reiserfs_panic(th->t_super, "journal-1577: handle trans id %ld != current trans id %ld\n",
th->t_trans_id, SB_JOURNAL(p_s_sb)->j_trans_id);
}
- p_s_sb->s_dirt = 1 ;
+ p_s_sb->s_dirt |= S_SUPER_DIRTY;
prepared = test_and_clear_bit(BH_JPrepared, &bh->b_state) ;
/* already in this transaction, we are done */
@@ -2407,12 +2417,8 @@
** flushes any old transactions to disk
** ends the current transaction if it is too old
**
-** also calls flush_journal_list with old_only == 1, which allows me to reclaim
-** memory and such from the journal lists whose real blocks are all on disk.
-**
-** called by sync_dev_journal from buffer.c
*/
-int flush_old_commits(struct super_block *p_s_sb, int immediate) {
+int reiserfs_flush_old_commits(struct super_block *p_s_sb) {
int i ;
int count = 0;
int start ;
@@ -2429,8 +2435,7 @@
/* starting with oldest, loop until we get to the start */
i = (SB_JOURNAL_LIST_INDEX(p_s_sb) + 1) % JOURNAL_LIST_COUNT ;
while(i != start) {
- if (SB_JOURNAL_LIST(p_s_sb)[i].j_len > 0 && ((now - SB_JOURNAL_LIST(p_s_sb)[i].j_timestamp) > JOURNAL_MAX_COMMIT_AGE ||
- immediate)) {
+ if (SB_JOURNAL_LIST(p_s_sb)[i].j_len > 0 && ((now - SB_JOURNAL_LIST(p_s_sb)[i].j_timestamp) > JOURNAL_MAX_COMMIT_AGE)) {
/* we have to check again to be sure the current transaction did not change */
if (i != SB_JOURNAL_LIST_INDEX(p_s_sb)) {
flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + i, 1) ;
@@ -2439,26 +2444,26 @@
i = (i + 1) % JOURNAL_LIST_COUNT ;
count++ ;
}
+
/* now, check the current transaction. If there are no writers, and it is too old, finish it, and
** force the commit blocks to disk
*/
- if (!immediate && atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0 &&
+ if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0 &&
SB_JOURNAL(p_s_sb)->j_trans_start_time > 0 &&
SB_JOURNAL(p_s_sb)->j_len > 0 &&
(now - SB_JOURNAL(p_s_sb)->j_trans_start_time) > JOURNAL_MAX_TRANS_AGE) {
journal_join(&th, p_s_sb, 1) ;
reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
journal_mark_dirty(&th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
- do_journal_end(&th, p_s_sb,1, COMMIT_NOW) ;
- } else if (immediate) { /* belongs above, but I wanted this to be very explicit as a special case. If they say to
- flush, we must be sure old transactions hit the disk too. */
- journal_join(&th, p_s_sb, 1) ;
- reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ;
- journal_mark_dirty(&th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ;
+
+ /* we're only being called from kreiserfsd, it makes no sense to do
+ ** an async commit so that kreiserfsd can do it later
+ */
do_journal_end(&th, p_s_sb,1, COMMIT_NOW | WAIT) ;
- }
- reiserfs_journal_kupdate(p_s_sb) ;
- return 0 ;
+ }
+ reiserfs_journal_kupdate(p_s_sb) ;
+
+ return S_SUPER_DIRTY_COMMIT;
}
/*
@@ -2497,7 +2502,7 @@
if (SB_JOURNAL(p_s_sb)->j_len == 0) {
int wcount = atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) ;
unlock_journal(p_s_sb) ;
- if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) > 0 && wcount <= 0) {
+ if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) > 0 && wcount <= 0) {
atomic_dec(&(SB_JOURNAL(p_s_sb)->j_jlock)) ;
wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
}
@@ -2768,6 +2773,7 @@
** it tells us if we should continue with the journal_end, or just return
*/
if (!check_journal_end(th, p_s_sb, nblocks, flags)) {
+ p_s_sb->s_dirt |= S_SUPER_DIRTY;
return 0 ;
}
@@ -2937,17 +2943,12 @@
/* write any buffers that must hit disk before this commit is done */
fsync_inode_buffers(&(SB_JOURNAL(p_s_sb)->j_dummy_inode)) ;
- /* honor the flush and async wishes from the caller */
+ /* honor the flush wishes from the caller, simple commits can
+ ** be done outside the journal lock, they are done below
+ */
if (flush) {
-
flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ;
flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex , 1) ;
- } else if (commit_now) {
- if (wait_on_commit) {
- flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ;
- } else {
- commit_flush_async(p_s_sb, orig_jindex) ;
- }
}
/* reset journal values for the next transaction */
@@ -3009,6 +3010,16 @@
atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ;
/* wake up any body waiting to join. */
wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ;
+
+ if (!flush && commit_now) {
+ if (current->need_resched)
+ schedule() ;
+ if (wait_on_commit) {
+ flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ;
+ } else {
+ commit_flush_async(p_s_sb, orig_jindex) ;
+ }
+ }
return 0 ;
}
/* start at the beginning of the objectid map (i = 0) and go to
the end of it (i = disk_sb->s_oid_cursize). Linear search is
diff -urN v2.4.19p7/fs/reiserfs/stree.c linux/fs/reiserfs/stree.c
--- v2.4.19p7/fs/reiserfs/stree.c Thu Apr 18 16:30:16 2002
+++ linux/fs/reiserfs/stree.c Wed May 1 03:54:14 2002
@@ -598,26 +598,32 @@
/* The function is NOT SCHEDULE-SAFE! */
-static void search_by_key_reada (struct super_block * s, int blocknr)
+static void search_by_key_reada (struct super_block * s,
+ struct buffer_head **bh,
+ unsigned long *b, int num)
{
- struct buffer_head * bh;
+ int i,j;
- if (blocknr == 0)
- return;
-
- bh = getblk (s->s_dev, blocknr, s->s_blocksize);
-
- if (!buffer_uptodate (bh)) {
- ll_rw_block (READA, 1, &bh);
+ for (i = 0 ; i < num ; i++) {
+ bh[i] = sb_getblk (s, b[i]);
+ if (buffer_uptodate(bh[i])) {
+ brelse(bh[i]);
+ break;
+ }
+ touch_buffer(bh[i]);
+ }
+ if (i) {
+ ll_rw_block(READA, i, bh);
+ }
+ for(j = 0 ; j < i ; j++) {
+ if (bh[j])
+ brelse(bh[j]);
}
- bh->b_count --;
}
-#endif
-
/**************************************************************************
* Algorithm SearchByKey *
* look for item in the Disk S+Tree by its key *
@@ -660,6 +666,9 @@
int n_node_level, n_retval;
int right_neighbor_of_leaf_node;
int fs_gen;
+ struct buffer_head *reada_bh[SEARCH_BY_KEY_READA];
+ unsigned long reada_blocks[SEARCH_BY_KEY_READA];
+ int reada_count = 0;
-#ifdef SEARCH_BY_KEY_READA
- /* schedule read of right neighbor */
- search_by_key_reada (p_s_sb, right_neighbor_of_leaf_node);
-#endif
-
+ /* schedule read of right neighbors */
+ if (reada_count) {
+ search_by_key_reada (p_s_sb, reada_bh, reada_blocks, reada_count);
+ reada_count = 0;
+ }
/* Read the next tree node, and set the last element in the path to
have a pointer to it. */
if ( ! (p_s_bh = p_s_last_element->pe_buffer =
@@ -785,11 +794,20 @@
position in the node. */
n_block_number = B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position);
-#ifdef SEARCH_BY_KEY_READA
- /* if we are going to read leaf node, then calculate its right neighbor if possible */
- if (n_node_level == DISK_LEAF_NODE_LEVEL + 1 && p_s_last_element->pe_position < B_NR_ITEMS (p_s_bh))
- right_neighbor_of_leaf_node = B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position + 1);
-#endif
+ /* if we are going to read leaf node, then try to find good leaves
+ ** for read ahead as well. Don't bother for stat data though
+ */
+ if (reiserfs_test4(p_s_sb) &&
+ n_node_level == DISK_LEAF_NODE_LEVEL + 1 &&
+ p_s_last_element->pe_position < B_NR_ITEMS (p_s_bh) &&
+ !is_statdata_cpu_key(p_s_key))
+ {
+ int pos = p_s_last_element->pe_position;
+ int limit = B_NR_ITEMS(p_s_bh);
+ while(pos <= limit && reada_count < SEARCH_BY_KEY_READA) {
+ reada_blocks[reada_count++] = B_N_CHILD_NUM(p_s_bh, pos++);
+ }
+ }
}
}
-//
-// a portion of this function, particularly the VFS interface portion,
-// was derived from minix or ext2's analog and evolved as the
-// prototype did. You should be able to tell which portion by looking
-// at the ext2 code and comparing. It's subfunctions contain no code
-// used as a template unless they are so labeled.
-//
+/* kreiserfsd does all the periodic stuff for us */
static void reiserfs_write_super (struct super_block * s)
{
+ s->s_dirt = S_SUPER_DIRTY_COMMIT;
+}
if (parse_options ((char *) data, &(s->u.reiserfs_sb.s_mount_opt), &blocks) == 0) {
return NULL;
diff -urN v2.4.19p7/fs/reiserfs/tail_conversion.c linux/fs/reiserfs/tail_conversion.c
--- v2.4.19p7/fs/reiserfs/tail_conversion.c Thu Apr 18 16:30:16 2002
+++ linux/fs/reiserfs/tail_conversion.c Wed May 1 03:54:31 2002
@@ -30,7 +30,7 @@
key of unfm pointer to be pasted */
int n_blk_size,
n_retval; /* returned value for reiserfs_insert_item and clones */
- struct unfm_nodeinfo unfm_ptr; /* Handle on an unformatted node
+ unp_t unfm_ptr; /* Handle on an unformatted node
that will be inserted in the
tree. */
+/* since we've added the idea of comit_dirty vs regular dirty with
+ * commit_super operation, only use the S_SUPER_DIRTY mask if
+ * the FS has a commit_super op.
+ */
+static inline int super_dirty(struct super_block *sb)
+{
+ if (sb->s_op && sb->s_op->commit_super) {
+ return sb->s_dirt & S_SUPER_DIRTY;
+ }
+ return sb->s_dirt;
+}
+
+
static inline void write_super(struct super_block *sb)
{
lock_super(sb);
- if (sb->s_root && sb->s_dirt)
+ if (sb->s_root && super_dirty(sb))
if (sb->s_op && sb->s_op->write_super)
sb->s_op->write_super(sb);
unlock_super(sb);
}
+static inline void commit_super(struct super_block *sb)
+{
+ lock_super(sb);
+ if (sb->s_root && sb->s_dirt) {
+ if (sb->s_op && sb->s_op->write_super)
+ sb->s_op->write_super(sb);
+ if (sb->s_op && sb->s_op->commit_super)
+ sb->s_op->commit_super(sb);
+ }
+ unlock_super(sb);
+}
+
+void commit_supers(kdev_t dev)
+{
+ struct super_block * sb;
+
+ if (dev) {
+ sb = get_super(dev);
+ if (sb) {
+ if (sb->s_dirt)
+ commit_super(sb);
+ drop_super(sb);
+ }
+ }
+restart:
+ spin_lock(&sb_lock);
+ sb = sb_entry(super_blocks.next);
+ while (sb != sb_entry(&super_blocks))
+ if (sb->s_dirt) {
+ sb->s_count++;
+ spin_unlock(&sb_lock);
+ down_read(&sb->s_umount);
+ commit_super(sb);
+ drop_super(sb);
+ goto restart;
+ } else
+ sb = sb_entry(sb->s_list.next);
+ spin_unlock(&sb_lock);
+}
+
/*
* Note: check the dirty flag before waiting, so we don't
* hold up the sync while mounting a device. (The newly
@@ -462,7 +515,7 @@
spin_lock(&sb_lock);
sb = sb_entry(super_blocks.next);
while (sb != sb_entry(&super_blocks))
- if (sb->s_dirt) {
+ if (super_dirty(sb)) {
sb->s_count++;
spin_unlock(&sb_lock);
down_read(&sb->s_umount);
diff -urN v2.4.19p7/include/linux/fs.h linux/include/linux/fs.h
--- v2.4.19p7/include/linux/fs.h Thu Apr 18 16:30:26 2002
+++ linux/include/linux/fs.h Wed May 1 03:54:14 2002
@@ -706,6 +706,10 @@
#define sb_entry(list) list_entry((list), struct super_block, s_list)
#define S_BIAS (1<<30)
+
+/* flags for the s_dirt field */
+#define S_SUPER_DIRTY 1
+#define S_SUPER_DIRTY_COMMIT 2
struct super_block {
struct list_head s_list; /* Keep this first */
kdev_t s_dev;
@@ -918,6 +922,7 @@
struct dentry * (*fh_to_dentry)(struct super_block *sb, __u32 *fh, int len, int fhtype, int parent);
int (*dentry_to_fh)(struct dentry *, __u32 *fh, int *lenp, int need_parent);
int (*show_options)(struct seq_file *, struct vfsmount *);
+ void (*commit_super) (struct super_block *);
};
/* Inode state bits.. */
@@ -1226,6 +1231,7 @@
extern int filemap_fdatasync(struct address_space *);
extern int filemap_fdatawait(struct address_space *);
extern void sync_supers(kdev_t);
+extern void commit_supers(kdev_t);
extern int bmap(struct inode *, int);
extern int notify_change(struct dentry *, struct iattr *);
extern int permission(struct inode *, int);
diff -urN v2.4.19p7/include/linux/reiserfs_fs.h linux/include/linux/reiserfs_fs.h
--- v2.4.19p7/include/linux/reiserfs_fs.h Thu Apr 18 16:30:28 2002
+++ linux/include/linux/reiserfs_fs.h Wed May 1 04:08:30 2002
@@ -55,7 +55,7 @@
#define USE_INODE_GENERATION_COUNTER
#include <linux/list.h>
+
+// The cache for indirect item (iicache).
+struct iicache {
+ long i_cache_blocknr; /* the first of set of contiguous blocknrs */
+ long i_cache_size ; /* the amount of set of contiguous blocknrs */
+ long i_cache_block ; /* the first, cached logical block of file */
+};
/** bitmasks for i_flags field in reiserfs-specific part of inode */
typedef enum {
@@ -46,6 +53,10 @@
** flushed */
unsigned long i_trans_id ;
unsigned long i_trans_index ;
+
+ // The cache for indirect item (iicache).
+ struct iicache * iic;
+ int iic_asize; /* iicache array size */
};
#endif
diff -urN v2.4.19p7/include/linux/reiserfs_fs_sb.h linux/include/linux/reiserfs_fs_sb.h
--- v2.4.19p7/include/linux/reiserfs_fs_sb.h Thu Apr 18 16:30:28 2002
+++ linux/include/linux/reiserfs_fs_sb.h Wed May 1 04:10:15 2002
@@ -291,8 +291,7 @@
*/
struct reiserfs_page_list *j_flush_pages ;
time_t j_trans_start_time ; /* time this transaction started */
- wait_queue_head_t j_wait ; /* wait journal_end to finish I/O */
- atomic_t j_wlock ; /* lock for j_wait */
+ struct semaphore j_lock ;
wait_queue_head_t j_join_wait ; /* wait for current transaction to finish before starting new one */
atomic_t j_jlock ; /* lock for j_join_wait */
int j_journal_list_index ; /* journal list number of the current trans */
@@ -444,6 +443,7 @@
int s_is_unlinked_ok;
reiserfs_proc_info_data_t s_proc_info_data;
struct proc_dir_entry *procdir;
+ struct list_head s_reiserfs_supers;
};