--- drivers/acorn/block/fd1772.c.~1~ Thu Jan 7 16:46:58 1999
+++ drivers/acorn/block/fd1772.c Wed May 12 16:35:47 1999
@@ -1569,7 +1569,7 @@
if (!filp || (filp->f_mode & (2 | OPEN_WRITE_BIT)))
/* if the file is mounted OR (writable now AND writable at open
time) Linus: Does this cover all cases? */
- block_fsync(inode, filp);
+ block_fsync(inode, filp, 0);
if (fd_ref[drive] < 0)
fd_ref[drive] = 0;
--- drivers/block/ataflop.c.~1~ Tue Dec 29 19:23:59 1998
+++ drivers/block/ataflop.c Wed May 12 16:35:47 1999
@@ -1986,7 +1986,7 @@
* sync is required. Otherwise, sync if filp is writable.
*/
if (filp && (filp->f_mode & (2 | OPEN_WRITE_BIT)))
- block_fsync (filp, filp->f_dentry);
+ block_fsync (filp, filp->f_dentry, 0);
if (fd_ref[drive] < 0)
fd_ref[drive] = 0;
--- drivers/block/floppy.c.~1~ Tue Apr 20 22:33:21 1999
+++ drivers/block/floppy.c Wed May 12 16:35:47 1999
@@ -3677,7 +3677,7 @@
* sync is required. Otherwise, sync if filp is writable.
*/
if (filp && (filp->f_mode & (2 | OPEN_WRITE_BIT)))
- block_fsync(filp, filp->f_dentry);
+ block_fsync(filp, filp->f_dentry, 0);
if (UDRS->fd_ref < 0)
UDRS->fd_ref=0;
--- drivers/block/swim3.c.~1~ Tue Apr 20 22:33:02 1999
+++ drivers/block/swim3.c Wed May 12 16:35:47 1999
@@ -941,7 +941,7 @@
* sync is required. Otherwise, sync if filp is writable.
*/
if (filp && (filp->f_mode & (2 | OPEN_WRITE_BIT)))
- block_fsync (filp, filp->f_dentry);
+ block_fsync (filp, filp->f_dentry, 0);
fs = &floppy_states[devnum];
sw = fs->swim3;
--- fs/block_dev.c.~1~ Fri Nov 13 18:07:26 1998
+++ fs/block_dev.c Wed May 12 16:35:47 1999
@@ -292,7 +292,7 @@
* since the vma has no handle.
*/
-int block_fsync(struct file *filp, struct dentry *dentry)
+int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
{
return fsync_dev(dentry->d_inode->i_rdev);
}
--- fs/buffer.c.~1~ Wed May 12 15:54:22 1999
+++ fs/buffer.c Wed May 12 16:35:48 1999
@@ -55,6 +55,8 @@
#define MAX_UNUSED_BUFFERS NR_RESERVED+20 /* don't ever have more than this
number of unused buffer heads */
+#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_inode_buffers)
+
/*
* Hash table mask..
*/
@@ -302,7 +304,7 @@
* filp may be NULL if called via the msync of a vma.
*/
- /* this needs further work, at the moment it is identical to fsync() */
down(&inode->i_sem);
- err = file->f_op->fsync(file, dentry);
+ err = file->f_op->fsync(file, dentry, 1);
up(&inode->i_sem);
out_putf:
@@ -548,6 +549,122 @@
}
}
+void buffer_insert_inode_queue(struct buffer_head *bh, struct inode *inode)
+{
+ if (bh->b_inode)
+ list_del(&bh->b_inode_buffers);
+ bh->b_inode = inode;
+ list_add(&bh->b_inode_buffers, &inode->i_dirty_buffers);
+}
+
+static void __remove_inode_queue(struct buffer_head *bh)
+{
+ bh->b_inode = NULL;
+ list_del(&bh->b_inode_buffers);
+}
+
+static inline void remove_inode_queue(struct buffer_head *bh)
+{
+ if (bh->b_inode)
+ __remove_inode_queue(bh);
+}
+
+
+/*
+ * Synchronise all the inode's dirty buffers to the disk.
+ *
+ * We have conflicting pressures: we want to make sure that all
+ * initially dirty buffers get waited on, but that any subsequently
+ * dirtied buffers don't. After all, we don't want fsync to last
+ * forever if somebody is actively writing to the file.
+ *
+ * Do this in two main stages: first we copy dirty buffers to a
+ * temporary inode list, queueing the writes as we go. Then we clean
+ * up, waiting for those writes to complete.
+ *
+ * During this second stage, any subsequent updates to the file may end
+ * up refiling the buffer on the original inode's dirty list again, so
+ * there is a chance we will end up with a buffer queued for write but
+ * not yet completed on that list. So, as a final cleanup we go through
+ * the osync code to catch these locked, dirty buffers without requeuing
+ * any newly dirty buffers for write.
+ */
+
+int fsync_inode_buffers(struct inode *inode)
+{
+ struct buffer_head *bh;
+ struct inode tmp;
+ int err = 0, err2;
+
+ INIT_LIST_HEAD(&tmp.i_dirty_buffers);
+
+ while (!list_empty(&inode->i_dirty_buffers)) {
+ bh = BH_ENTRY(inode->i_dirty_buffers.next);
+ list_del(&bh->b_inode_buffers);
+ if (!buffer_dirty(bh) && !buffer_locked(bh))
+ bh->b_inode = NULL;
+ else {
+ bh->b_inode = &tmp;
+ list_add(&bh->b_inode_buffers, &tmp.i_dirty_buffers);
+ bh->b_count++;
+ if (buffer_dirty(bh))
+ ll_rw_block(WRITE, 1, &bh);
+ }
+ }
+
+ while (!list_empty(&tmp.i_dirty_buffers)) {
+ bh = BH_ENTRY(tmp.i_dirty_buffers.prev);
+ remove_inode_queue(bh);
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh))
+ err = -EIO;
+ brelse(bh);
+ }
+
+ err2 = osync_inode_buffers(inode);
+
+ if (err)
+ return err;
+ else
+ return err2;
+}
+
+
+/*
+ * osync is designed to support O_SYNC io. It waits synchronously for
+ * all already-submitted IO to complete, but does not queue any new
+ * writes to the disk.
+ *
+ * To do O_SYNC writes, just queue the buffer writes with ll_rw_block as
+ * you dirty the buffers, and then use osync_inode_buffers to wait for
+ * completion. Any other dirty buffers which are not yet queued for
+ * write will not be flushed to disk by the osync.
+ */
+
+int osync_inode_buffers(struct inode *inode)
+{
+ struct buffer_head *bh;
+ struct list_head *list;
+ int err = 0;
+
+ repeat:
+
+ for (list = inode->i_dirty_buffers.prev;
+ bh = BH_ENTRY(list), list != &inode->i_dirty_buffers;
+ list = bh->b_inode_buffers.prev) {
+ if (buffer_locked(bh)) {
+ bh->b_count++;
+ wait_on_buffer(bh);
+ brelse(bh);
+ if (!buffer_uptodate(bh))
+ err = -EIO;
+ goto repeat;
+ }
+ }
+ return err;
+}
+
+
struct buffer_head * find_buffer(kdev_t dev, int block, int size)
{
struct buffer_head * next;
@@ -558,7 +675,7 @@
if (!next)
break;
next = tmp->b_next;
- if (tmp->b_blocknr != block || tmp->b_size != size || tmp->b_dev != dev)
+ if (tmp->b_blocknr != block || tmp->b_dev != dev || tmp->b_size != size)
continue;
next = tmp;
break;
@@ -571,8 +688,7 @@
* As we don't lock buffers (unless we are reading them, that is),
* something might happen to it while we sleep (ie a read-error
* will force it bad). This shouldn't really happen currently, but
- * the code is ready.
- */
+ * the code is ready. */
struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
{
struct buffer_head * bh;
@@ -777,13 +893,14 @@
dispose = BUF_CLEAN;
if(dispose != buf->b_list) {
file_buffer(buf, dispose);
- if(dispose == BUF_DIRTY) {
+ switch (dispose) {
+ case BUF_DIRTY: {
int too_many = (nr_buffers * bdf_prm.b_un.nfract/100);
/* This buffer is dirty, maybe we need to start flushing.
* If too high a percentage of the buffers are dirty...
*/
- if (nr_buffers_type[BUF_DIRTY] > too_many)
+ if (nr_buffers_type[BUF_DIRTY] > too_many)
wakeup_bdflush(1);
/* If this is a loop device, and
@@ -793,6 +910,11 @@
if (MAJOR(buf->b_dev) == LOOP_MAJOR &&
nr_buffers_type[BUF_DIRTY]*2>nr_buffers)
wakeup_bdflush(1);
+ break;
+ }
+ case BUF_CLEAN:
+ remove_inode_queue(buf);
+ default:
}
}
}
@@ -822,6 +944,7 @@
*/
void __bforget(struct buffer_head * buf)
{
+ remove_inode_queue(buf);
if (buf->b_count != 1 || buffer_locked(buf)) {
__brelse(buf);
return;
@@ -1437,6 +1560,7 @@
tmp = tmp->b_this_page;
nr_buffers--;
remove_from_queues(p);
+ remove_inode_queue(p);
put_unused_buffer_head(p);
} while (tmp != bh);
- result = file_fsync(&cont_file ,&cont_dentry);
+ result = file_fsync(&cont_file ,&cont_dentry, 0);
if ( result == 0 ) {
result = venus_fsync(coda_inode->i_sb, &(cnp->c_fid));
}
--- fs/ext2/balloc.c.~1~ Thu Oct 29 05:54:56 1998
+++ fs/ext2/balloc.c Wed May 12 19:31:55 1999
@@ -74,42 +74,35 @@
}
/*
- * Read the bitmap for a given block_group, reading into the specified
- * slot in the superblock's bitmap cache.
+ * Read the bitmap for a given block_group.
*
- * Return >=0 on success or a -ve error code.
+ * Return the buffer_head on success or NULL on IO error.
*/
-static int read_block_bitmap (struct super_block * sb,
- unsigned int block_group,
- unsigned long bitmap_nr)
+static struct buffer_head * read_block_bitmap (struct super_block * sb,
+ unsigned int block_group)
{
struct ext2_group_desc * gdp;
struct buffer_head * bh = NULL;
- int retval = -EIO;
gdp = ext2_get_group_desc (sb, block_group, NULL);
if (!gdp)
- goto error_out;
- retval = 0;
+ return NULL;
+
+ unlock_super(sb);
bh = bread (sb->s_dev, le32_to_cpu(gdp->bg_block_bitmap), sb->s_blocksize);
+ lock_super(sb);
if (!bh) {
ext2_error (sb, "read_block_bitmap",
"Cannot read block bitmap - "
"block_group = %d, block_bitmap = %lu",
block_group, (unsigned long) gdp->bg_block_bitmap);
- retval = -EIO;
+ return NULL;
}
- /*
- * On IO error, just leave a zero in the superblock's block pointer for
- * this group. The IO will be retried next time.
- */
-error_out:
- sb->u.ext2_sb.s_block_bitmap_number[bitmap_nr] = block_group;
- sb->u.ext2_sb.s_block_bitmap[bitmap_nr] = bh;
- return retval;
+ return bh;
}
+
/*
* load_block_bitmap loads the block bitmap for a blocks group
*
@@ -126,9 +119,9 @@
static int load__block_bitmap (struct super_block * sb,
unsigned int block_group)
{
- int i, j, retval = 0;
+ int i, j;
unsigned long block_bitmap_number;
- struct buffer_head * block_bitmap;
+ struct buffer_head * block_bitmap, *cached_bitmap = NULL;
-static int sync_dindirect (struct inode * inode, u32 * diblock, int wait)
-{
- int i;
- struct buffer_head * dind_bh;
- int rc, err = 0;
-
- rc = sync_iblock (inode, diblock, &dind_bh, wait);
- if (rc || !dind_bh)
- return rc;
-
- for (i = 0; i < addr_per_block; i++) {
- rc = sync_indirect_swab32 (inode,
- ((u32 *) dind_bh->b_data) + i,
- wait);
- if (rc)
- err = rc;
- }
- brelse (dind_bh);
- return err;
-}
+/*
+ * Unlike the fsync functions, ext2_osync_inode only waits for
+ * completion of already-scheduled writes on this inode. It does not
+ * write out any new dirty data.
+ */
-#ifndef __LITTLE_ENDIAN
-static __inline__ int sync_dindirect_swab32 (struct inode * inode, u32 * diblock, int wait)
+int ext2_osync_inode(struct inode *inode, int datasync)
{
- int i;
- struct buffer_head * dind_bh;
- int rc, err = 0;
-
- rc = sync_iblock_swab32 (inode, diblock, &dind_bh, wait);
- if (rc || !dind_bh)
- return rc;
+ int err;
- for (i = 0; i < addr_per_block; i++) {
- rc = sync_indirect_swab32 (inode,
- ((u32 *) dind_bh->b_data) + i,
- wait);
- if (rc)
- err = rc;
- }
- brelse (dind_bh);
- return err;
-}
-#else
-#define sync_dindirect_swab32 sync_dindirect
-#endif
-
-static int sync_tindirect (struct inode * inode, u32 * tiblock, int wait)
-{
- int i;
- struct buffer_head * tind_bh;
- int rc, err = 0;
-
- rc = sync_iblock (inode, tiblock, &tind_bh, wait);
- if (rc || !tind_bh)
- return rc;
+ err = osync_inode_buffers(inode);
+ if (!(inode->i_state & I_DIRTY))
+ return err;
+ if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+ return err;
- for (i = 0; i < addr_per_block; i++) {
- rc = sync_dindirect_swab32 (inode,
- ((u32 *) tind_bh->b_data) + i,
- wait);
- if (rc)
- err = rc;
- }
- brelse (tind_bh);
- return err;
-}
-
-/*
- * File may be NULL when we are called. Perhaps we shouldn't
- * even pass file to fsync ?
- */
-
-int ext2_sync_file(struct file * file, struct dentry *dentry)
-{
- int wait, err = 0;
- struct inode *inode = dentry->d_inode;
-
- if (S_ISLNK(inode->i_mode) && !(inode->i_blocks))
- /*
- * Don't sync fast links!
- */
- goto skip;
-
- for (wait=0; wait<=1; wait++)
- {
- err |= sync_direct (inode, wait);
- err |= sync_indirect (inode,
- inode->u.ext2_i.i_data+EXT2_IND_BLOCK,
- wait);
- err |= sync_dindirect (inode,
- inode->u.ext2_i.i_data+EXT2_DIND_BLOCK,
- wait);
- err |= sync_tindirect (inode,
- inode->u.ext2_i.i_data+EXT2_TIND_BLOCK,
- wait);
- }
-skip:
- err |= ext2_sync_inode (inode);
+ err |= ext2_sync_inode(inode);
return err ? -EIO : 0;
}
+
--- fs/ext2/ialloc.c.~1~ Tue Oct 20 22:08:14 1998
+++ fs/ext2/ialloc.c Wed May 12 19:33:06 1999
@@ -39,42 +39,35 @@
#include <asm/byteorder.h>
/*
- * Read the inode allocation bitmap for a given block_group, reading
- * into the specified slot in the superblock's bitmap cache.
+ * Read the bitmap for a given block_group.
*
- * Return >=0 on success or a -ve error code.
+ * Return the buffer_head on success or NULL on IO error.
*/
-static int read_inode_bitmap (struct super_block * sb,
- unsigned long block_group,
- unsigned int bitmap_nr)
+
+static struct buffer_head * read_inode_bitmap (struct super_block * sb,
+ unsigned long block_group)
{
struct ext2_group_desc * gdp;
struct buffer_head * bh = NULL;
- int retval = 0;
gdp = ext2_get_group_desc (sb, block_group, NULL);
- if (!gdp) {
- retval = -EIO;
- goto error_out;
- }
+ if (!gdp)
+ return NULL;
+
+ unlock_super(sb);
bh = bread (sb->s_dev, le32_to_cpu(gdp->bg_inode_bitmap), sb->s_blocksize);
+ lock_super(sb);
if (!bh) {
ext2_error (sb, "read_inode_bitmap",
"Cannot read inode bitmap - "
"block_group = %lu, inode_bitmap = %lu",
block_group, (unsigned long) gdp->bg_inode_bitmap);
- retval = -EIO;
+ return NULL;
}
- /*
- * On IO error, just leave a zero in the superblock's block pointer for
- * this group. The IO will be retried next time.
- */
-error_out:
- sb->u.ext2_sb.s_inode_bitmap_number[bitmap_nr] = block_group;
- sb->u.ext2_sb.s_inode_bitmap[bitmap_nr] = bh;
- return retval;
+ return bh;
}
+
/*
* load_inode_bitmap loads the inode bitmap for a blocks group
*
@@ -91,9 +84,9 @@
static int load_inode_bitmap (struct super_block * sb,
unsigned int block_group)
{
- int i, j, retval = 0;
+ int i, j;
unsigned long inode_bitmap_number;
- struct buffer_head * inode_bitmap;
+ struct buffer_head * inode_bitmap, * cached_bitmap = NULL;
/* fsync.c */
-extern int ext2_sync_file (struct file *, struct dentry *);
+extern int ext2_fsync_file (struct file *, struct dentry *, int);
+extern int ext2_fsync_inode (struct inode *, int);
+extern int ext2_osync_inode (struct inode *, int);
/* ialloc.c */
extern struct inode * ext2_new_inode (const struct inode *, int, int *);
--- include/linux/fs.h.~1~ Wed May 12 16:04:53 1999
+++ include/linux/fs.h Wed May 12 17:09:10 1999
@@ -184,38 +184,35 @@
#define BH_Protected 6 /* 1 if the buffer is protected */
/*
- * Try to keep the most commonly used fields in single cache lines (16
+ * Try to keep the most commonly used fields in single cache lines (32
* bytes) to improve performance. This ordering should be
- * particularly beneficial on 32-bit processors.
- *
- * We use the first 16 bytes for the data which is used in searches
- * over the block hash lists (ie. getblk(), find_buffer() and
- * friends).
- *
- * The second 16 bytes we use for lru buffer scans, as used by
- * sync_buffers() and refill_freelist(). -- sct
+ * particularly beneficial on 32-bit processors. -- sct
*/
struct buffer_head {
/* First cache line: */
struct buffer_head * b_next; /* Hash queue list */
unsigned long b_blocknr; /* block number */
- unsigned long b_size; /* block size */
kdev_t b_dev; /* device (B_FREE = free) */
kdev_t b_rdev; /* Real device */
- unsigned long b_rsector; /* Real buffer location on disk */
struct buffer_head * b_this_page; /* circular list of buffers in one page */
unsigned long b_state; /* buffer state bitmap (see above) */
struct buffer_head * b_next_free;
unsigned int b_count; /* users using this block */
+ unsigned int b_list; /* List that this buffer appears */
/* Non-performance-critical data follows. */
+ unsigned long b_size; /* block size */
+ unsigned long b_rsector; /* Real buffer location on disk */
char * b_data; /* pointer to data block (1024 bytes) */
- unsigned int b_list; /* List that this buffer appears */
unsigned long b_flushtime; /* Time when this (dirty) buffer
* should be written */
struct wait_queue * b_wait;
struct buffer_head ** b_pprev; /* doubly linked list of hash-queue */
struct buffer_head * b_prev_free; /* doubly linked list of buffers */
+
+ struct inode * b_inode;
+ struct list_head b_inode_buffers; /* doubly linked list of inode dirty buffers */
+
struct buffer_head * b_reqnext; /* request queue */