--- drivers/acorn/block/fd1772.c.~1~ Thu Jan 7 16:46:58 1999

--- drivers/acorn/block/fd1772.c.~1~ Thu Jan 7 16:46:58 1999
+++ drivers/acorn/block/fd1772.c Wed May 12 16:35:47 1999
@@ -1569,7 +1569,7 @@
if (!filp || (filp->f_mode & (2 | OPEN_WRITE_BIT)))
/* if the file is mounted OR (writable now AND writable at open
time) Linus: Does this cover all cases? */
- block_fsync(inode, filp);
+ block_fsync(inode, filp, 0);

if (fd_ref[drive] < 0)
fd_ref[drive] = 0;
--- drivers/block/ataflop.c.~1~ Tue Dec 29 19:23:59 1998
+++ drivers/block/ataflop.c Wed May 12 16:35:47 1999
@@ -1986,7 +1986,7 @@
* sync is required. Otherwise, sync if filp is writable.
*/
if (filp && (filp->f_mode & (2 | OPEN_WRITE_BIT)))
- block_fsync (filp, filp->f_dentry);
+ block_fsync (filp, filp->f_dentry, 0);

if (fd_ref[drive] < 0)
fd_ref[drive] = 0;
--- drivers/block/floppy.c.~1~ Tue Apr 20 22:33:21 1999
+++ drivers/block/floppy.c Wed May 12 16:35:47 1999
@@ -3677,7 +3677,7 @@
* sync is required. Otherwise, sync if filp is writable.
*/
if (filp && (filp->f_mode & (2 | OPEN_WRITE_BIT)))
- block_fsync(filp, filp->f_dentry);
+ block_fsync(filp, filp->f_dentry, 0);

if (UDRS->fd_ref < 0)
UDRS->fd_ref=0;
--- drivers/block/swim3.c.~1~ Tue Apr 20 22:33:02 1999
+++ drivers/block/swim3.c Wed May 12 16:35:47 1999
@@ -941,7 +941,7 @@
* sync is required. Otherwise, sync if filp is writable.
*/
if (filp && (filp->f_mode & (2 | OPEN_WRITE_BIT)))
- block_fsync (filp, filp->f_dentry);
+ block_fsync (filp, filp->f_dentry, 0);

fs = &floppy_states[devnum];
sw = fs->swim3;
--- fs/block_dev.c.~1~ Fri Nov 13 18:07:26 1998
+++ fs/block_dev.c Wed May 12 16:35:47 1999
@@ -292,7 +292,7 @@
* since the vma has no handle.
*/

-int block_fsync(struct file *filp, struct dentry *dentry)
+int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
{
return fsync_dev(dentry->d_inode->i_rdev);
}
--- fs/buffer.c.~1~ Wed May 12 15:54:22 1999
+++ fs/buffer.c Wed May 12 16:35:48 1999
@@ -55,6 +55,8 @@
#define MAX_UNUSED_BUFFERS NR_RESERVED+20 /* don't ever have more than this
number of unused buffer heads */

+#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_inode_buffers)
+
/*
* Hash table mask..
*/
@@ -302,7 +304,7 @@
* filp may be NULL if called via the msync of a vma.
*/

-int file_fsync(struct file *filp, struct dentry *dentry)
+int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
{
struct inode * inode = dentry->d_inode;
struct super_block * sb;
@@ -349,7 +351,7 @@

/* We need to protect against concurrent writers.. */
down(&inode->i_sem);
- err = file->f_op->fsync(file, dentry);
+ err = file->f_op->fsync(file, dentry, 0);
up(&inode->i_sem);

out_putf:
@@ -384,9 +386,8 @@
if (!file->f_op || !file->f_op->fsync)
goto out_putf;

- /* this needs further work, at the moment it is identical to fsync() */
down(&inode->i_sem);
- err = file->f_op->fsync(file, dentry);
+ err = file->f_op->fsync(file, dentry, 1);
up(&inode->i_sem);

out_putf:
@@ -548,6 +549,122 @@
}
}

+void buffer_insert_inode_queue(struct buffer_head *bh, struct inode *inode)
+{
+ if (bh->b_inode)
+ list_del(&bh->b_inode_buffers);
+ bh->b_inode = inode;
+ list_add(&bh->b_inode_buffers, &inode->i_dirty_buffers);
+}
+
+static void __remove_inode_queue(struct buffer_head *bh)
+{
+ bh->b_inode = NULL;
+ list_del(&bh->b_inode_buffers);
+}
+
+static inline void remove_inode_queue(struct buffer_head *bh)
+{
+ if (bh->b_inode)
+ __remove_inode_queue(bh);
+}
+
+
+/*
+ * Synchronise all the inode's dirty buffers to the disk.
+ *
+ * We have conflicting pressures: we want to make sure that all
+ * initially dirty buffers get waited on, but that any subsequently
+ * dirtied buffers don't. After all, we don't want fsync to last
+ * forever if somebody is actively writing to the file.
+ *
+ * Do this in two main stages: first we copy dirty buffers to a
+ * temporary inode list, queueing the writes as we go. Then we clean
+ * up, waiting for those writes to complete.
+ *
+ * During this second stage, any subsequent updates to the file may end
+ * up refiling the buffer on the original inode's dirty list again, so
+ * there is a chance we will end up with a buffer queued for write but
+ * not yet completed on that list. So, as a final cleanup we go through
+ * the osync code to catch these locked, dirty buffers without requeuing
+ * any newly dirty buffers for write.
+ */
+
+int fsync_inode_buffers(struct inode *inode)
+{
+ struct buffer_head *bh;
+ struct inode tmp;
+ int err = 0, err2;
+
+ INIT_LIST_HEAD(&tmp.i_dirty_buffers);
+
+ while (!list_empty(&inode->i_dirty_buffers)) {
+ bh = BH_ENTRY(inode->i_dirty_buffers.next);
+ list_del(&bh->b_inode_buffers);
+ if (!buffer_dirty(bh) && !buffer_locked(bh))
+ bh->b_inode = NULL;
+ else {
+ bh->b_inode = &tmp;
+ list_add(&bh->b_inode_buffers, &tmp.i_dirty_buffers);
+ bh->b_count++;
+ if (buffer_dirty(bh))
+ ll_rw_block(WRITE, 1, &bh);
+ }
+ }
+
+ while (!list_empty(&tmp.i_dirty_buffers)) {
+ bh = BH_ENTRY(tmp.i_dirty_buffers.prev);
+ remove_inode_queue(bh);
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh))
+ err = -EIO;
+ brelse(bh);
+ }
+
+ err2 = osync_inode_buffers(inode);
+
+ if (err)
+ return err;
+ else
+ return err2;
+}
+
+
+/*
+ * osync is designed to support O_SYNC io. It waits synchronously for
+ * all already-submitted IO to complete, but does not queue any new
+ * writes to the disk.
+ *
+ * To do O_SYNC writes, just queue the buffer writes with ll_rw_block as
+ * you dirty the buffers, and then use osync_inode_buffers to wait for
+ * completion. Any other dirty buffers which are not yet queued for
+ * write will not be flushed to disk by the osync.
+ */
+
+int osync_inode_buffers(struct inode *inode)
+{
+ struct buffer_head *bh;
+ struct list_head *list;
+ int err = 0;
+
+ repeat:
+
+ for (list = inode->i_dirty_buffers.prev;
+ bh = BH_ENTRY(list), list != &inode->i_dirty_buffers;
+ list = bh->b_inode_buffers.prev) {
+ if (buffer_locked(bh)) {
+ bh->b_count++;
+ wait_on_buffer(bh);
+ brelse(bh);
+ if (!buffer_uptodate(bh))
+ err = -EIO;
+ goto repeat;
+ }
+ }
+ return err;
+}
+
+
struct buffer_head * find_buffer(kdev_t dev, int block, int size)
{
struct buffer_head * next;
@@ -558,7 +675,7 @@
if (!next)
break;
next = tmp->b_next;
- if (tmp->b_blocknr != block || tmp->b_size != size || tmp->b_dev != dev)
+ if (tmp->b_blocknr != block || tmp->b_dev != dev || tmp->b_size != size)
continue;
next = tmp;
break;
@@ -571,8 +688,7 @@
* As we don't lock buffers (unless we are reading them, that is),
* something might happen to it while we sleep (ie a read-error
* will force it bad). This shouldn't really happen currently, but
- * the code is ready.
- */
+ * the code is ready. */
struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
{
struct buffer_head * bh;
@@ -777,13 +893,14 @@
dispose = BUF_CLEAN;
if(dispose != buf->b_list) {
file_buffer(buf, dispose);
- if(dispose == BUF_DIRTY) {
+ switch (dispose) {
+ case BUF_DIRTY: {
int too_many = (nr_buffers * bdf_prm.b_un.nfract/100);

/* This buffer is dirty, maybe we need to start flushing.
* If too high a percentage of the buffers are dirty...
*/
- if (nr_buffers_type[BUF_DIRTY] > too_many)
+ if (nr_buffers_type[BUF_DIRTY] > too_many)
wakeup_bdflush(1);

/* If this is a loop device, and
@@ -793,6 +910,11 @@
if (MAJOR(buf->b_dev) == LOOP_MAJOR &&
nr_buffers_type[BUF_DIRTY]*2>nr_buffers)
wakeup_bdflush(1);
+ break;
+ }
+ case BUF_CLEAN:
+ remove_inode_queue(buf);
+ default:
}
}
}
@@ -822,6 +944,7 @@
*/
void __bforget(struct buffer_head * buf)
{
+ remove_inode_queue(buf);
if (buf->b_count != 1 || buffer_locked(buf)) {
__brelse(buf);
return;
@@ -1437,6 +1560,7 @@
tmp = tmp->b_this_page;
nr_buffers--;
remove_from_queues(p);
+ remove_inode_queue(p);
put_unused_buffer_head(p);
} while (tmp != bh);

--- fs/coda/dir.c.~1~ Wed May 12 15:54:22 1999
+++ fs/coda/dir.c Wed May 12 16:35:48 1999
@@ -50,7 +50,7 @@
/* support routines */
static int coda_venus_readdir(struct file *filp, void *dirent,
filldir_t filldir);
-int coda_fsync(struct file *, struct dentry *dentry);
+int coda_fsync(struct file *, struct dentry *dentry, int);

int coda_crossvol_rename = 0;
int coda_hasmknod = 0;
--- fs/coda/file.c.~1~ Mon Aug 31 23:46:10 1998
+++ fs/coda/file.c Wed May 12 16:35:48 1999
@@ -32,7 +32,7 @@
static int coda_file_mmap(struct file * file, struct vm_area_struct * vma);

/* also exported from this file (used for dirs) */
-int coda_fsync(struct file *, struct dentry *dentry);
+int coda_fsync(struct file *, struct dentry *dentry, int);

struct inode_operations coda_file_inode_operations = {
&coda_file_operations, /* default file operations */
@@ -205,7 +205,7 @@
return result;
}

-int coda_fsync(struct file *coda_file, struct dentry *coda_dentry)
+int coda_fsync(struct file *coda_file, struct dentry *coda_dentry, int datasync)
{
struct coda_inode_info *cnp;
struct inode *coda_inode = coda_dentry->d_inode;
@@ -234,7 +234,7 @@

down(&cont_inode->i_sem);

- result = file_fsync(&cont_file ,&cont_dentry);
+ result = file_fsync(&cont_file ,&cont_dentry, 0);
if ( result == 0 ) {
result = venus_fsync(coda_inode->i_sb, &(cnp->c_fid));
}
--- fs/ext2/balloc.c.~1~ Thu Oct 29 05:54:56 1998
+++ fs/ext2/balloc.c Wed May 12 19:31:55 1999
@@ -74,42 +74,35 @@
}

/*
- * Read the bitmap for a given block_group, reading into the specified
- * slot in the superblock's bitmap cache.
+ * Read the bitmap for a given block_group.
*
- * Return >=0 on success or a -ve error code.
+ * Return the buffer_head on success or NULL on IO error.
*/

-static int read_block_bitmap (struct super_block * sb,
- unsigned int block_group,
- unsigned long bitmap_nr)
+static struct buffer_head * read_block_bitmap (struct super_block * sb,
+ unsigned int block_group)
{
struct ext2_group_desc * gdp;
struct buffer_head * bh = NULL;
- int retval = -EIO;

gdp = ext2_get_group_desc (sb, block_group, NULL);
if (!gdp)
- goto error_out;
- retval = 0;
+ return NULL;
+
+ unlock_super(sb);
bh = bread (sb->s_dev, le32_to_cpu(gdp->bg_block_bitmap), sb->s_blocksize);
+ lock_super(sb);
if (!bh) {
ext2_error (sb, "read_block_bitmap",
"Cannot read block bitmap - "
"block_group = %d, block_bitmap = %lu",
block_group, (unsigned long) gdp->bg_block_bitmap);
- retval = -EIO;
+ return NULL;
}
- /*
- * On IO error, just leave a zero in the superblock's block pointer for
- * this group. The IO will be retried next time.
- */
-error_out:
- sb->u.ext2_sb.s_block_bitmap_number[bitmap_nr] = block_group;
- sb->u.ext2_sb.s_block_bitmap[bitmap_nr] = bh;
- return retval;
+ return bh;
}

+
/*
* load_block_bitmap loads the block bitmap for a blocks group
*
@@ -126,9 +119,9 @@
static int load__block_bitmap (struct super_block * sb,
unsigned int block_group)
{
- int i, j, retval = 0;
+ int i, j;
unsigned long block_bitmap_number;
- struct buffer_head * block_bitmap;
+ struct buffer_head * block_bitmap, *cached_bitmap = NULL;

if (block_group >= sb->u.ext2_sb.s_groups_count)
ext2_panic (sb, "load_block_bitmap",
@@ -136,27 +129,35 @@
"block_group = %d, groups_count = %lu",
block_group, sb->u.ext2_sb.s_groups_count);

+ retry:
+
if (sb->u.ext2_sb.s_groups_count <= EXT2_MAX_GROUP_LOADED) {
if (sb->u.ext2_sb.s_block_bitmap[block_group]) {
if (sb->u.ext2_sb.s_block_bitmap_number[block_group] ==
- block_group)
+ block_group) {
+ brelse(cached_bitmap);
return block_group;
+ }
ext2_error (sb, "load_block_bitmap",
"block_group != block_bitmap_number");
}
- retval = read_block_bitmap (sb, block_group, block_group);
- if (retval < 0)
- return retval;
+ if (!cached_bitmap)
+ goto load;
+ sb->u.ext2_sb.s_block_bitmap_number[block_group] = block_group;
+ sb->u.ext2_sb.s_block_bitmap[block_group] = cached_bitmap;
return block_group;
}

for (i = 0; i < sb->u.ext2_sb.s_loaded_block_bitmaps &&
sb->u.ext2_sb.s_block_bitmap_number[i] != block_group; i++)
;
+
+ /* Already cached? */
if (i < sb->u.ext2_sb.s_loaded_block_bitmaps &&
sb->u.ext2_sb.s_block_bitmap_number[i] == block_group) {
block_bitmap_number = sb->u.ext2_sb.s_block_bitmap_number[i];
block_bitmap = sb->u.ext2_sb.s_block_bitmap[i];
+
for (j = i; j > 0; j--) {
sb->u.ext2_sb.s_block_bitmap_number[j] =
sb->u.ext2_sb.s_block_bitmap_number[j - 1];
@@ -165,28 +166,36 @@
}
sb->u.ext2_sb.s_block_bitmap_number[0] = block_bitmap_number;
sb->u.ext2_sb.s_block_bitmap[0] = block_bitmap;
-
- /*
- * There's still one special case here --- if block_bitmap == 0
- * then our last attempt to read the bitmap failed and we have
- * just ended up caching that failure. Try again to read it.
- */
- if (!block_bitmap)
- retval = read_block_bitmap (sb, block_group, 0);
- } else {
- if (sb->u.ext2_sb.s_loaded_block_bitmaps < EXT2_MAX_GROUP_LOADED)
- sb->u.ext2_sb.s_loaded_block_bitmaps++;
- else
- brelse (sb->u.ext2_sb.s_block_bitmap[EXT2_MAX_GROUP_LOADED - 1]);
- for (j = sb->u.ext2_sb.s_loaded_block_bitmaps - 1; j > 0; j--) {
- sb->u.ext2_sb.s_block_bitmap_number[j] =
- sb->u.ext2_sb.s_block_bitmap_number[j - 1];
- sb->u.ext2_sb.s_block_bitmap[j] =
- sb->u.ext2_sb.s_block_bitmap[j - 1];
- }
- retval = read_block_bitmap (sb, block_group, 0);
+ touch_buffer(block_bitmap);
+ brelse(cached_bitmap);
+ return 0;
+ }
+
+ /* Have we loaded the right bitmap yet? */
+ if (!cached_bitmap)
+ goto load;
+
+ /* OK, shift the bitmaps round and install this new one in the list. */
+ if (sb->u.ext2_sb.s_loaded_block_bitmaps < EXT2_MAX_GROUP_LOADED)
+ sb->u.ext2_sb.s_loaded_block_bitmaps++;
+ else
+ brelse (sb->u.ext2_sb.s_block_bitmap[EXT2_MAX_GROUP_LOADED - 1]);
+ for (j = sb->u.ext2_sb.s_loaded_block_bitmaps - 1; j > 0; j--) {
+ sb->u.ext2_sb.s_block_bitmap_number[j] =
+ sb->u.ext2_sb.s_block_bitmap_number[j - 1];
+ sb->u.ext2_sb.s_block_bitmap[j] =
+ sb->u.ext2_sb.s_block_bitmap[j - 1];
}
- return retval;
+
+ sb->u.ext2_sb.s_block_bitmap_number[0] = block_group;
+ sb->u.ext2_sb.s_block_bitmap[0] = cached_bitmap;
+ return 0;
+
+ load:
+ cached_bitmap = read_block_bitmap(sb, block_group);
+ if (!cached_bitmap)
+ return -EIO;
+ goto retry;
}

/*
--- fs/ext2/dir.c.~1~ Wed May 12 15:53:58 1999
+++ fs/ext2/dir.c Wed May 12 16:35:48 1999
@@ -45,7 +45,7 @@
NULL, /* no special open code */
NULL, /* flush */
NULL, /* no special release code */
- ext2_sync_file, /* fsync */
+ ext2_fsync_file, /* fsync */
NULL, /* fasync */
NULL, /* check_media_change */
NULL /* revalidate */
--- fs/ext2/file.c.~1~ Mon Dec 21 23:22:54 1998
+++ fs/ext2/file.c Wed May 12 17:16:51 1999
@@ -76,7 +76,7 @@
#endif
NULL, /* flush */
ext2_release_file, /* release */
- ext2_sync_file, /* fsync */
+ ext2_fsync_file, /* fsync */
NULL, /* fasync */
NULL, /* check_media_change */
NULL /* revalidate */
@@ -270,7 +270,10 @@
buf += c;
count -= c;
mark_buffer_uptodate(bh, 1);
- mark_buffer_dirty(bh, 0);
+ /* Even if the file is open for O_SYNC write, we need to
+ * attach the dirty buffer to the inode to avoid a
+ * concurrent fsync() returning early. */
+ mark_buffer_dirty_inode(bh, 0, inode);

if (filp->f_flags & O_SYNC)
bufferlist[buffercount++] = bh;
@@ -279,7 +282,6 @@
if (buffercount == NBUF){
ll_rw_block(WRITE, buffercount, bufferlist);
for(i=0; i<buffercount; i++){
- wait_on_buffer(bufferlist[i]);
if (!buffer_uptodate(bufferlist[i]))
write_error=1;
brelse(bufferlist[i]);
@@ -295,7 +297,6 @@
if ( buffercount ){
ll_rw_block(WRITE, buffercount, bufferlist);
for(i=0; i<buffercount; i++){
- wait_on_buffer(bufferlist[i]);
if (!buffer_uptodate(bufferlist[i]))
write_error=1;
brelse(bufferlist[i]);
@@ -307,8 +308,14 @@
inode->u.ext2_i.i_osync--;
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
*ppos = pos;
- mark_inode_dirty(inode);
- return written;
+ mark_inode_dirty_sync(inode);
+ if (filp->f_flags & O_SYNC)
+ write_error |= ext2_osync_inode(inode, 1);
+ if (written)
+ return written;
+ if (write_error)
+ return -EIO;
+ return 0;
}

/*
--- fs/ext2/fsync.c.~1~ Wed May 20 21:09:12 1998
+++ fs/ext2/fsync.c Wed May 12 16:35:48 1999
@@ -32,254 +32,49 @@
#include <linux/locks.h>

-#define blocksize (EXT2_BLOCK_SIZE(inode->i_sb))
-#define addr_per_block (EXT2_ADDR_PER_BLOCK(inode->i_sb))

-static int sync_block (struct inode * inode, u32 * block, int wait)
-{
- struct buffer_head * bh;
-
- if (!*block)
- return 0;
- bh = get_hash_table (inode->i_dev, *block, blocksize);
- if (!bh)
- return 0;
- if (wait && buffer_req(bh) && !buffer_uptodate(bh)) {
- brelse (bh);
- return -1;
- }
- if (wait || !buffer_uptodate(bh) || !buffer_dirty(bh)) {
- brelse (bh);
- return 0;
- }
- ll_rw_block (WRITE, 1, &bh);
- bh->b_count--;
- return 0;
-}
-
-#ifndef __LITTLE_ENDIAN
-static int sync_block_swab32 (struct inode * inode, u32 * block, int wait)
-{
- struct buffer_head * bh;
-
- if (!le32_to_cpu(*block))
- return 0;
- bh = get_hash_table (inode->i_dev, le32_to_cpu(*block), blocksize);
- if (!bh)
- return 0;
- if (wait && buffer_req(bh) && !buffer_uptodate(bh)) {
- brelse (bh);
- return -1;
- }
- if (wait || !buffer_uptodate(bh) || !buffer_dirty(bh)) {
- brelse (bh);
- return 0;
- }
- ll_rw_block (WRITE, 1, &bh);
- bh->b_count--;
- return 0;
-}
-#else
-#define sync_block_swab32 sync_block
-#endif
-
-
-static int sync_iblock (struct inode * inode, u32 * iblock,
- struct buffer_head ** bh, int wait)
-{
- int rc, tmp;
-
- *bh = NULL;
- tmp = *iblock;
- if (!tmp)
- return 0;
- rc = sync_block (inode, iblock, wait);
- if (rc)
- return rc;
- *bh = bread (inode->i_dev, tmp, blocksize);
- if (!*bh)
- return -1;
- return 0;
-}
-
-#ifndef __LITTLE_ENDIAN
-static int sync_iblock_swab32 (struct inode * inode, u32 * iblock,
- struct buffer_head ** bh, int wait)
-{
- int rc, tmp;
-
- *bh = NULL;
- tmp = le32_to_cpu(*iblock);
- if (!tmp)
- return 0;
- rc = sync_block_swab32 (inode, iblock, wait);
- if (rc)
- return rc;
- *bh = bread (inode->i_dev, tmp, blocksize);
- if (!*bh)
- return -1;
- return 0;
-}
-#else
-#define sync_iblock_swab32 sync_iblock
-#endif
+/*
+ * File may be NULL when we are called. Perhaps we shouldn't
+ * even pass file to fsync ?
+ */

-static int sync_direct (struct inode * inode, int wait)
+int ext2_fsync_file(struct file * file, struct dentry *dentry, int datasync)
{
- int i;
- int rc, err = 0;
-
- for (i = 0; i < EXT2_NDIR_BLOCKS; i++) {
- rc = sync_block (inode, inode->u.ext2_i.i_data + i, wait);
- if (rc)
- err = rc;
- }
- return err;
+ struct inode *inode = dentry->d_inode;
+ return ext2_fsync_inode(inode, datasync);
}

-static int sync_indirect (struct inode * inode, u32 * iblock, int wait)
+int ext2_fsync_inode(struct inode *inode, int datasync)
{
- int i;
- struct buffer_head * ind_bh;
- int rc, err = 0;
-
- rc = sync_iblock (inode, iblock, &ind_bh, wait);
- if (rc || !ind_bh)
- return rc;
+ int err;

- for (i = 0; i < addr_per_block; i++) {
- rc = sync_block_swab32 (inode,
- ((u32 *) ind_bh->b_data) + i,
- wait);
- if (rc)
- err = rc;
- }
- brelse (ind_bh);
- return err;
-}
-
-#ifndef __LITTLE_ENDIAN
-static __inline__ int sync_indirect_swab32 (struct inode * inode, u32 * iblock, int wait)
-{
- int i;
- struct buffer_head * ind_bh;
- int rc, err = 0;
-
- rc = sync_iblock_swab32 (inode, iblock, &ind_bh, wait);
- if (rc || !ind_bh)
- return rc;
+ err = fsync_inode_buffers(inode);
+ if (!(inode->i_state & I_DIRTY))
+ return err;
+ if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+ return err;

- for (i = 0; i < addr_per_block; i++) {
- rc = sync_block_swab32 (inode,
- ((u32 *) ind_bh->b_data) + i,
- wait);
- if (rc)
- err = rc;
- }
- brelse (ind_bh);
- return err;
+ err |= ext2_sync_inode(inode);
+ return err ? -EIO : 0;
}
-#else
-#define sync_indirect_swab32 sync_indirect
-#endif

-static int sync_dindirect (struct inode * inode, u32 * diblock, int wait)
-{
- int i;
- struct buffer_head * dind_bh;
- int rc, err = 0;
-
- rc = sync_iblock (inode, diblock, &dind_bh, wait);
- if (rc || !dind_bh)
- return rc;
-
- for (i = 0; i < addr_per_block; i++) {
- rc = sync_indirect_swab32 (inode,
- ((u32 *) dind_bh->b_data) + i,
- wait);
- if (rc)
- err = rc;
- }
- brelse (dind_bh);
- return err;
-}
+/*
+ * Unlike the fsync functions, ext2_osync_inode only waits for
+ * completion of already-scheduled writes on this inode. It does not
+ * write out any new dirty data.
+ */

-#ifndef __LITTLE_ENDIAN
-static __inline__ int sync_dindirect_swab32 (struct inode * inode, u32 * diblock, int wait)
+int ext2_osync_inode(struct inode *inode, int datasync)
{
- int i;
- struct buffer_head * dind_bh;
- int rc, err = 0;
-
- rc = sync_iblock_swab32 (inode, diblock, &dind_bh, wait);
- if (rc || !dind_bh)
- return rc;
+ int err;

- for (i = 0; i < addr_per_block; i++) {
- rc = sync_indirect_swab32 (inode,
- ((u32 *) dind_bh->b_data) + i,
- wait);
- if (rc)
- err = rc;
- }
- brelse (dind_bh);
- return err;
-}
-#else
-#define sync_dindirect_swab32 sync_dindirect
-#endif
-
-static int sync_tindirect (struct inode * inode, u32 * tiblock, int wait)
-{
- int i;
- struct buffer_head * tind_bh;
- int rc, err = 0;
-
- rc = sync_iblock (inode, tiblock, &tind_bh, wait);
- if (rc || !tind_bh)
- return rc;
+ err = osync_inode_buffers(inode);
+ if (!(inode->i_state & I_DIRTY))
+ return err;
+ if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+ return err;

- for (i = 0; i < addr_per_block; i++) {
- rc = sync_dindirect_swab32 (inode,
- ((u32 *) tind_bh->b_data) + i,
- wait);
- if (rc)
- err = rc;
- }
- brelse (tind_bh);
- return err;
-}
-
-/*
- * File may be NULL when we are called. Perhaps we shouldn't
- * even pass file to fsync ?
- */
-
-int ext2_sync_file(struct file * file, struct dentry *dentry)
-{
- int wait, err = 0;
- struct inode *inode = dentry->d_inode;
-
- if (S_ISLNK(inode->i_mode) && !(inode->i_blocks))
- /*
- * Don't sync fast links!
- */
- goto skip;
-
- for (wait=0; wait<=1; wait++)
- {
- err |= sync_direct (inode, wait);
- err |= sync_indirect (inode,
- inode->u.ext2_i.i_data+EXT2_IND_BLOCK,
- wait);
- err |= sync_dindirect (inode,
- inode->u.ext2_i.i_data+EXT2_DIND_BLOCK,
- wait);
- err |= sync_tindirect (inode,
- inode->u.ext2_i.i_data+EXT2_TIND_BLOCK,
- wait);
- }
-skip:
- err |= ext2_sync_inode (inode);
+ err |= ext2_sync_inode(inode);
return err ? -EIO : 0;
}
+
--- fs/ext2/ialloc.c.~1~ Tue Oct 20 22:08:14 1998
+++ fs/ext2/ialloc.c Wed May 12 19:33:06 1999
@@ -39,42 +39,35 @@
#include <asm/byteorder.h>

/*
- * Read the inode allocation bitmap for a given block_group, reading
- * into the specified slot in the superblock's bitmap cache.
+ * Read the bitmap for a given block_group.
*
- * Return >=0 on success or a -ve error code.
+ * Return the buffer_head on success or NULL on IO error.
*/
-static int read_inode_bitmap (struct super_block * sb,
- unsigned long block_group,
- unsigned int bitmap_nr)
+
+static struct buffer_head * read_inode_bitmap (struct super_block * sb,
+ unsigned long block_group)
{
struct ext2_group_desc * gdp;
struct buffer_head * bh = NULL;
- int retval = 0;

gdp = ext2_get_group_desc (sb, block_group, NULL);
- if (!gdp) {
- retval = -EIO;
- goto error_out;
- }
+ if (!gdp)
+ return NULL;
+
+ unlock_super(sb);
bh = bread (sb->s_dev, le32_to_cpu(gdp->bg_inode_bitmap), sb->s_blocksize);
+ lock_super(sb);
if (!bh) {
ext2_error (sb, "read_inode_bitmap",
"Cannot read inode bitmap - "
"block_group = %lu, inode_bitmap = %lu",
block_group, (unsigned long) gdp->bg_inode_bitmap);
- retval = -EIO;
+ return NULL;
}
- /*
- * On IO error, just leave a zero in the superblock's block pointer for
- * this group. The IO will be retried next time.
- */
-error_out:
- sb->u.ext2_sb.s_inode_bitmap_number[bitmap_nr] = block_group;
- sb->u.ext2_sb.s_inode_bitmap[bitmap_nr] = bh;
- return retval;
+ return bh;
}

+
/*
* load_inode_bitmap loads the inode bitmap for a blocks group
*
@@ -91,9 +84,9 @@
static int load_inode_bitmap (struct super_block * sb,
unsigned int block_group)
{
- int i, j, retval = 0;
+ int i, j;
unsigned long inode_bitmap_number;
- struct buffer_head * inode_bitmap;
+ struct buffer_head * inode_bitmap, * cached_bitmap = NULL;

if (block_group >= sb->u.ext2_sb.s_groups_count)
ext2_panic (sb, "load_inode_bitmap",
@@ -104,18 +97,23 @@
sb->u.ext2_sb.s_inode_bitmap_number[0] == block_group &&
sb->u.ext2_sb.s_inode_bitmap[0] != NULL)
return 0;
+
+ retry:
+
if (sb->u.ext2_sb.s_groups_count <= EXT2_MAX_GROUP_LOADED) {
if (sb->u.ext2_sb.s_inode_bitmap[block_group]) {
if (sb->u.ext2_sb.s_inode_bitmap_number[block_group] != block_group)
ext2_panic (sb, "load_inode_bitmap",
"block_group != inode_bitmap_number");
- else
+ else {
+ brelse(cached_bitmap);
return block_group;
+ }
} else {
- retval = read_inode_bitmap (sb, block_group,
- block_group);
- if (retval < 0)
- return retval;
+ if (!cached_bitmap)
+ goto load;
+ sb->u.ext2_sb.s_inode_bitmap_number[block_group] = block_group;
+ sb->u.ext2_sb.s_inode_bitmap[block_group] = cached_bitmap;
return block_group;
}
}
@@ -124,10 +122,13 @@
sb->u.ext2_sb.s_inode_bitmap_number[i] != block_group;
i++)
;
+
+ /* Already cached? */
if (i < sb->u.ext2_sb.s_loaded_inode_bitmaps &&
sb->u.ext2_sb.s_inode_bitmap_number[i] == block_group) {
inode_bitmap_number = sb->u.ext2_sb.s_inode_bitmap_number[i];
inode_bitmap = sb->u.ext2_sb.s_inode_bitmap[i];
+
for (j = i; j > 0; j--) {
sb->u.ext2_sb.s_inode_bitmap_number[j] =
sb->u.ext2_sb.s_inode_bitmap_number[j - 1];
@@ -136,29 +137,35 @@
}
sb->u.ext2_sb.s_inode_bitmap_number[0] = inode_bitmap_number;
sb->u.ext2_sb.s_inode_bitmap[0] = inode_bitmap;
+ touch_buffer(inode_bitmap);
+ brelse(cached_bitmap);
+ return 0;
+ }

- /*
- * There's still one special case here --- if inode_bitmap == 0
- * then our last attempt to read the bitmap failed and we have
- * just ended up caching that failure. Try again to read it.
- */
- if (!inode_bitmap)
- retval = read_inode_bitmap (sb, block_group, 0);
-
- } else {
- if (sb->u.ext2_sb.s_loaded_inode_bitmaps < EXT2_MAX_GROUP_LOADED)
- sb->u.ext2_sb.s_loaded_inode_bitmaps++;
- else
- brelse (sb->u.ext2_sb.s_inode_bitmap[EXT2_MAX_GROUP_LOADED - 1]);
- for (j = sb->u.ext2_sb.s_loaded_inode_bitmaps - 1; j > 0; j--) {
- sb->u.ext2_sb.s_inode_bitmap_number[j] =
- sb->u.ext2_sb.s_inode_bitmap_number[j - 1];
- sb->u.ext2_sb.s_inode_bitmap[j] =
- sb->u.ext2_sb.s_inode_bitmap[j - 1];
- }
- retval = read_inode_bitmap (sb, block_group, 0);
+ /* Have we loaded the right bitmap yet? */
+ if (!cached_bitmap)
+ goto load;
+
+ if (sb->u.ext2_sb.s_loaded_inode_bitmaps < EXT2_MAX_GROUP_LOADED)
+ sb->u.ext2_sb.s_loaded_inode_bitmaps++;
+ else
+ brelse (sb->u.ext2_sb.s_inode_bitmap[EXT2_MAX_GROUP_LOADED - 1]);
+ for (j = sb->u.ext2_sb.s_loaded_inode_bitmaps - 1; j > 0; j--) {
+ sb->u.ext2_sb.s_inode_bitmap_number[j] =
+ sb->u.ext2_sb.s_inode_bitmap_number[j - 1];
+ sb->u.ext2_sb.s_inode_bitmap[j] =
+ sb->u.ext2_sb.s_inode_bitmap[j - 1];
}
- return retval;
+
+ sb->u.ext2_sb.s_inode_bitmap_number[0] = block_group;
+ sb->u.ext2_sb.s_inode_bitmap[0] = cached_bitmap;
+ return 0;
+
+ load:
+ cached_bitmap = read_inode_bitmap(sb, block_group);
+ if (!cached_bitmap)
+ return -EIO;
+ goto retry;
}

/*
--- fs/ext2/inode.c.~1~ Wed May 12 15:54:22 1999
+++ fs/ext2/inode.c Wed May 12 16:35:48 1999
@@ -123,7 +123,7 @@
}
memset(bh->b_data, 0, inode->i_sb->s_blocksize);
mark_buffer_uptodate(bh, 1);
- mark_buffer_dirty(bh, 1);
+ mark_buffer_dirty_inode(bh, 1, inode);
brelse (bh);
} else {
ext2_discard_prealloc (inode);
@@ -270,10 +270,7 @@
inode->u.ext2_i.i_next_alloc_goal = tmp;
inode->i_ctime = CURRENT_TIME;
inode->i_blocks += blocks;
- if (IS_SYNC(inode) || inode->u.ext2_i.i_osync)
- ext2_sync_inode (inode);
- else
- mark_inode_dirty(inode);
+ mark_inode_dirty(inode);
return result;
}

@@ -350,11 +347,9 @@
goto repeat;
}
*p = le32_to_cpu(tmp);
- mark_buffer_dirty(bh, 1);
- if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) {
+ mark_buffer_dirty_inode(bh, 1, inode);
+ if (IS_SYNC(inode) || inode->u.ext2_i.i_osync)
ll_rw_block (WRITE, 1, &bh);
- wait_on_buffer (bh);
- }
inode->i_ctime = CURRENT_TIME;
inode->i_blocks += blocks;
mark_inode_dirty(inode);
--- fs/ext2/namei.c.~1~ Wed May 12 15:54:22 1999
+++ fs/ext2/namei.c Wed May 12 17:08:50 1999
@@ -302,7 +302,7 @@
dir->u.ext2_i.i_flags &= ~EXT2_BTREE_FL;
mark_inode_dirty(dir);
dir->i_version = ++event;
- mark_buffer_dirty(bh, 1);
+ mark_buffer_dirty_inode(bh, 1, dir);
*res_dir = de;
*err = 0;
return bh;
@@ -384,14 +384,20 @@
EXT2_FEATURE_INCOMPAT_FILETYPE))
de->file_type = EXT2_FT_REG_FILE;
dir->i_version = ++event;
- mark_buffer_dirty(bh, 1);
+
+ err = 0;
+ mark_buffer_dirty_inode(bh, 1, dir);
if (IS_SYNC(dir)) {
+ if (ext2_osync_inode(inode, 1))
+ err = -EIO;
ll_rw_block (WRITE, 1, &bh);
- wait_on_buffer (bh);
+ if (ext2_osync_inode(dir, 1))
+ err = -EIO;
}
+
brelse (bh);
d_instantiate(dentry, inode);
- return 0;
+ return err;
}

int ext2_mknod (struct inode * dir, struct dentry *dentry, int mode, int rdev)
@@ -437,14 +443,18 @@
if (S_ISBLK(mode) || S_ISCHR(mode))
inode->i_rdev = to_kdev_t(rdev);
mark_inode_dirty(inode);
- mark_buffer_dirty(bh, 1);
+ mark_buffer_dirty_inode(bh, 1, dir);
+
+ err = 0;
if (IS_SYNC(dir)) {
+ if (ext2_osync_inode(inode, 1))
+ err = -EIO;
ll_rw_block (WRITE, 1, &bh);
- wait_on_buffer (bh);
+ if (ext2_osync_inode(dir, 1))
+ err = -EIO;
}
d_instantiate(dentry, inode);
brelse(bh);
- err = 0;
out:
return err;

@@ -498,7 +508,7 @@
EXT2_FEATURE_INCOMPAT_FILETYPE))
de->file_type = EXT2_FT_DIR;
inode->i_nlink = 2;
- mark_buffer_dirty(dir_block, 1);
+ mark_buffer_dirty_inode(dir_block, 1, dir);
brelse (dir_block);
inode->i_mode = S_IFDIR | (mode & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask);
if (dir->i_mode & S_ISGID)
@@ -511,18 +521,23 @@
if (EXT2_HAS_INCOMPAT_FEATURE(dir->i_sb,
EXT2_FEATURE_INCOMPAT_FILETYPE))
de->file_type = EXT2_FT_DIR;
+ mark_buffer_dirty_inode(bh, 1, dir);
dir->i_version = ++event;
- mark_buffer_dirty(bh, 1);
- if (IS_SYNC(dir)) {
- ll_rw_block (WRITE, 1, &bh);
- wait_on_buffer (bh);
- }
dir->i_nlink++;
dir->u.ext2_i.i_flags &= ~EXT2_BTREE_FL;
mark_inode_dirty(dir);
+
+ err = 0;
+ if (IS_SYNC(dir)) {
+ if (ext2_osync_inode(inode, 1))
+ err = -EIO;
+ ll_rw_block (WRITE, 1, &bh);
+ if (ext2_osync_inode(dir, 1))
+ err = -EIO;
+ }
+
d_instantiate(dentry, inode);
brelse (bh);
- err = 0;
out:
return err;

@@ -622,11 +637,7 @@
dir->i_version = ++event;
if (retval)
goto end_rmdir;
- mark_buffer_dirty(bh, 1);
- if (IS_SYNC(dir)) {
- ll_rw_block (WRITE, 1, &bh);
- wait_on_buffer (bh);
- }
+ mark_buffer_dirty_inode(bh, 1, dir);
if (inode->i_nlink != 2)
ext2_warning (inode->i_sb, "ext2_rmdir",
"empty directory has nlink!=2 (%d)",
@@ -639,6 +650,14 @@
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
dir->u.ext2_i.i_flags &= ~EXT2_BTREE_FL;
mark_inode_dirty(dir);
+ retval = 0;
+ if (IS_SYNC(dir)) {
+ ll_rw_block (WRITE, 1, &bh);
+ if (ext2_osync_inode(dir, 1))
+ retval = -EIO;
+ if (ext2_osync_inode(inode, 1))
+ retval = -EIO;
+ }
d_delete(dentry);

end_rmdir:
@@ -675,11 +694,7 @@
if (retval)
goto end_unlink;
dir->i_version = ++event;
- mark_buffer_dirty(bh, 1);
- if (IS_SYNC(dir)) {
- ll_rw_block (WRITE, 1, &bh);
- wait_on_buffer (bh);
- }
+ mark_buffer_dirty_inode(bh, 1, dir);
dir->i_ctime = dir->i_mtime = CURRENT_TIME;
dir->u.ext2_i.i_flags &= ~EXT2_BTREE_FL;
mark_inode_dirty(dir);
@@ -687,6 +702,13 @@
mark_inode_dirty(inode);
inode->i_ctime = dir->i_ctime;
retval = 0;
+ if (IS_SYNC(dir)) {
+ ll_rw_block (WRITE, 1, &bh);
+ if (ext2_osync_inode(dir, 1))
+ retval = -EIO;
+ if (ext2_osync_inode(inode, 1))
+ retval = -EIO;
+ }
d_delete(dentry); /* This also frees the inode */

end_unlink:
@@ -734,7 +756,7 @@
link[i++] = c;
link[i] = 0;
if (name_block) {
- mark_buffer_dirty(name_block, 1);
+ mark_buffer_dirty_inode(name_block, 1, inode);
brelse (name_block);
}
inode->i_size = i;
@@ -748,14 +770,15 @@
EXT2_FEATURE_INCOMPAT_FILETYPE))
de->file_type = EXT2_FT_SYMLINK;
dir->i_version = ++event;
- mark_buffer_dirty(bh, 1);
+ mark_buffer_dirty_inode(bh, 1, dir);
+ err = 0;
if (IS_SYNC(dir)) {
ll_rw_block (WRITE, 1, &bh);
- wait_on_buffer (bh);
+ if (ext2_osync_inode(dir, 1))
+ err = -EIO;
}
brelse (bh);
d_instantiate(dentry, inode);
- err = 0;
out:
return err;

@@ -801,18 +824,20 @@
de->file_type = EXT2_FT_FIFO;
}
dir->i_version = ++event;
- mark_buffer_dirty(bh, 1);
- if (IS_SYNC(dir)) {
- ll_rw_block (WRITE, 1, &bh);
- wait_on_buffer (bh);
- }
- brelse (bh);
+ mark_buffer_dirty_inode(bh, 1, dir);
inode->i_nlink++;
inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
inode->i_count++;
+ err = 0;
+ if (IS_SYNC(dir)) {
+ ll_rw_block (WRITE, 1, &bh);
+ if (ext2_osync_inode(dir, 1))
+ err = -EIO;
+ }
+ brelse (bh);
d_instantiate(dentry, inode);
- return 0;
+ return err;
}

#define PARENT_INO(buffer) \
@@ -902,7 +927,7 @@
mark_inode_dirty(old_dir);
if (dir_bh) {
PARENT_INO(dir_bh->b_data) = le32_to_cpu(new_dir->i_ino);
- mark_buffer_dirty(dir_bh, 1);
+ mark_buffer_dirty_inode(dir_bh, 1, old_inode);
old_dir->i_nlink--;
mark_inode_dirty(old_dir);
if (new_inode) {
@@ -914,19 +939,32 @@
mark_inode_dirty(new_dir);
}
}
- mark_buffer_dirty(old_bh, 1);
- if (IS_SYNC(old_dir)) {
- ll_rw_block (WRITE, 1, &old_bh);
- wait_on_buffer (old_bh);
- }
- mark_buffer_dirty(new_bh, 1);
+
+ retval = 0;
+
+ /* We do sync processing on the new directory first to ensure
+ * that on a crash, we never actually loose the dirent */
+ mark_buffer_dirty_inode(new_bh, 1, new_dir);
if (IS_SYNC(new_dir)) {
ll_rw_block (WRITE, 1, &new_bh);
- wait_on_buffer (new_bh);
+ if (ext2_osync_inode(new_dir, 1))
+ retval = -EIO;
}

- retval = 0;
+ mark_buffer_dirty_inode(old_bh, 1, old_dir);
+ if (IS_SYNC(old_dir)) {
+ ll_rw_block (WRITE, 1, &old_bh);
+ if (ext2_osync_inode(old_dir, 1))
+ retval = -EIO;
+ }

+ if (IS_SYNC(old_dir) || IS_SYNC(new_dir)) {
+ if (buffer_dirty(old_bh))
+ ll_rw_block (WRITE, 1, &old_bh);
+ if (ext2_osync_inode(old_inode, 1))
+ retval = -EIO;
+ }
+
end_rename:
brelse (dir_bh);
brelse (old_bh);
--- fs/ext2/truncate.c.~1~ Mon Oct 5 18:04:34 1998
+++ fs/ext2/truncate.c Wed May 12 16:35:48 1999
@@ -143,7 +143,7 @@
*/
bforget(bh);
if (ind_bh)
- mark_buffer_dirty(ind_bh, 1);
+ mark_buffer_dirty_inode(ind_bh, 1, inode);
ext2_free_blocks (inode, tmp, 1);
goto out;
}
@@ -230,7 +230,7 @@
inode->i_ino, tmp);
*p = 0;
if (dind_bh)
- mark_buffer_dirty(dind_bh, 1);
+ mark_buffer_dirty_inode(dind_bh, 1, inode);
else
mark_inode_dirty(inode);
return 0;
@@ -266,7 +266,7 @@
inode->i_blocks -= blocks;
mark_inode_dirty(inode);
bforget(bh);
- mark_buffer_dirty(ind_bh, 1);
+ mark_buffer_dirty_inode(ind_bh, 1, inode);

/* accumulate blocks to free if they're contiguous */
if (free_count == 0)
@@ -312,7 +312,7 @@
inode->i_ino, tmp);
*p = 0;
if (tind_bh)
- mark_buffer_dirty(tind_bh, 1);
+ mark_buffer_dirty_inode(tind_bh, 1, inode);
else
mark_inode_dirty(inode);
return 0;
@@ -425,7 +425,7 @@
if (bh) {
memset (bh->b_data + offset, 0,
inode->i_sb->s_blocksize - offset);
- mark_buffer_dirty (bh, 0);
+ mark_buffer_dirty_inode (bh, 0, inode);
brelse (bh);
}
}
--- fs/inode.c.~1~ Wed May 12 15:54:22 1999
+++ fs/inode.c Wed May 12 16:35:48 1999
@@ -79,14 +79,14 @@
* In short, make sure you hash any inodes _before_
* you start marking them dirty..
*/
-void __mark_inode_dirty(struct inode *inode)
+void __mark_inode_dirty(struct inode *inode, int flags)
{
struct super_block * sb = inode->i_sb;

if (sb) {
spin_lock(&inode_lock);
- if (!(inode->i_state & I_DIRTY)) {
- inode->i_state |= I_DIRTY;
+ if ((inode->i_state & flags) != flags) {
+ inode->i_state |= flags;
/* Only add valid (ie hashed) inodes to the dirty list */
if (!list_empty(&inode->i_hash)) {
list_del(&inode->i_list);
@@ -129,6 +129,7 @@
init_waitqueue(&inode->i_wait);
INIT_LIST_HEAD(&inode->i_hash);
INIT_LIST_HEAD(&inode->i_dentry);
+ INIT_LIST_HEAD(&inode->i_dirty_buffers);
sema_init(&inode->i_sem, 1);
sema_init(&inode->i_atomic_write, 1);
}
@@ -149,7 +150,8 @@
list_del(&inode->i_list);
list_add(&inode->i_list, &inode_in_use);
/* Set I_LOCK, reset I_DIRTY */
- inode->i_state ^= I_DIRTY | I_LOCK;
+ inode->i_state |= I_LOCK;
+ inode->i_state &= ~I_DIRTY;
spin_unlock(&inode_lock);

write_inode(inode);
@@ -194,6 +196,7 @@
spin_unlock(&inode_lock);
}

+
/*
* Called with the spinlock already held..
*/
@@ -291,7 +294,7 @@
inode = list_entry(tmp, struct inode, i_list);
if (inode->i_sb != sb)
continue;
- if (!inode->i_count) {
+ if (!inode->i_count && list_empty(&inode->i_dirty_buffers)) {
list_del(&inode->i_hash);
INIT_LIST_HEAD(&inode->i_hash);
list_del(&inode->i_list);
@@ -336,7 +339,8 @@
* dispose_list.
*/
#define CAN_UNUSE(inode) \
- (((inode)->i_count | (inode)->i_state) == 0)
+ (((inode)->i_count | (inode)->i_state) == 0 && \
+ list_empty(&(inode)->i_dirty_buffers))
#define INODE(entry) (list_entry(entry, struct inode, i_list))

static int free_inodes(void)
@@ -801,5 +805,5 @@
if ( IS_NODIRATIME (inode) && S_ISDIR (inode->i_mode) ) return;
if ( IS_RDONLY (inode) ) return;
inode->i_atime = CURRENT_TIME;
- mark_inode_dirty (inode);
+ mark_inode_dirty_sync (inode);
} /* End Function update_atime */
--- fs/minix/fsync.c.~1~ Thu Nov 12 19:44:09 1998
+++ fs/minix/fsync.c Wed May 12 16:35:48 1999
@@ -332,7 +332,7 @@
* NULL
*/

-int minix_sync_file(struct file * file, struct dentry *dentry)
+int minix_sync_file(struct file * file, struct dentry *dentry, int datasync)
{
struct inode *inode = dentry->d_inode;

--- fs/ncpfs/file.c.~1~ Mon Aug 24 21:02:44 1998
+++ fs/ncpfs/file.c Wed May 12 16:35:48 1999
@@ -26,7 +26,7 @@
return a < b ? a : b;
}

-static int ncp_fsync(struct file *file, struct dentry *dentry)
+static int ncp_fsync(struct file *file, struct dentry *dentry, int datasync)
{
return 0;
}
--- fs/nfs/file.c.~1~ Wed May 12 15:54:22 1999
+++ fs/nfs/file.c Wed May 12 16:35:48 1999
@@ -36,7 +36,7 @@
static ssize_t nfs_file_read(struct file *, char *, size_t, loff_t *);
static ssize_t nfs_file_write(struct file *, const char *, size_t, loff_t *);
static int nfs_file_flush(struct file *);
-static int nfs_fsync(struct file *, struct dentry *dentry);
+static int nfs_fsync(struct file *, struct dentry *dentry, int);

static struct file_operations nfs_file_operations = {
NULL, /* lseek - default */
@@ -141,7 +141,7 @@
* whether any write errors occurred for this process.
*/
static int
-nfs_fsync(struct file *file, struct dentry *dentry)
+nfs_fsync(struct file *file, struct dentry *dentry, int datasync)
{
struct inode *inode = dentry->d_inode;
int status;
--- fs/nfsd/vfs.c.~1~ Tue Apr 20 22:33:36 1999
+++ fs/nfsd/vfs.c Wed May 12 16:35:48 1999
@@ -398,7 +398,7 @@
void
nfsd_sync(struct inode *inode, struct file *filp)
{
- filp->f_op->fsync(filp, filp->f_dentry);
+ filp->f_op->fsync(filp, filp->f_dentry, 0);
}

/*
--- fs/qnx4/fsync.c.~1~ Tue Sep 1 18:43:13 1998
+++ fs/qnx4/fsync.c Wed May 12 16:35:48 1999
@@ -146,7 +146,7 @@
return err;
}

-int qnx4_sync_file(struct file *file, struct dentry *dentry)
+int qnx4_sync_file(struct file *file, struct dentry *dentry, int datasync)
{
struct inode *inode = dentry->d_inode;
int wait, err = 0;
--- fs/smbfs/file.c.~1~ Wed Mar 17 16:12:22 1999
+++ fs/smbfs/file.c Wed May 12 16:35:48 1999
@@ -39,7 +39,7 @@
}

static int
-smb_fsync(struct file *file, struct dentry * dentry)
+smb_fsync(struct file *file, struct dentry * dentry, int datasync)
{
#ifdef SMBFS_DEBUG_VERBOSE
printk("smb_fsync: sync file %s/%s\n",
--- fs/sysv/fsync.c.~1~ Sun Sep 13 20:12:34 1998
+++ fs/sysv/fsync.c Wed May 12 16:35:48 1999
@@ -178,7 +178,7 @@
return err;
}

-int sysv_sync_file(struct file * file, struct dentry *dentry)
+int sysv_sync_file(struct file * file, struct dentry *dentry, int datasync)
{
int wait, err = 0;
struct inode *inode = dentry->d_inode;
--- include/linux/ext2_fs.h.~1~ Wed May 12 15:54:02 1999
+++ include/linux/ext2_fs.h Wed May 12 16:35:48 1999
@@ -544,7 +544,9 @@
extern int ext2_write (struct inode *, struct file *, char *, int);

/* fsync.c */
-extern int ext2_sync_file (struct file *, struct dentry *);
+extern int ext2_fsync_file (struct file *, struct dentry *, int);
+extern int ext2_fsync_inode (struct inode *, int);
+extern int ext2_osync_inode (struct inode *, int);

/* ialloc.c */
extern struct inode * ext2_new_inode (const struct inode *, int, int *);
--- include/linux/fs.h.~1~ Wed May 12 16:04:53 1999
+++ include/linux/fs.h Wed May 12 17:09:10 1999
@@ -184,38 +184,35 @@
#define BH_Protected 6 /* 1 if the buffer is protected */

/*
- * Try to keep the most commonly used fields in single cache lines (16
+ * Try to keep the most commonly used fields in single cache lines (32
* bytes) to improve performance. This ordering should be
- * particularly beneficial on 32-bit processors.
- *
- * We use the first 16 bytes for the data which is used in searches
- * over the block hash lists (ie. getblk(), find_buffer() and
- * friends).
- *
- * The second 16 bytes we use for lru buffer scans, as used by
- * sync_buffers() and refill_freelist(). -- sct
+ * particularly beneficial on 32-bit processors. -- sct
*/
struct buffer_head {
/* First cache line: */
struct buffer_head * b_next; /* Hash queue list */
unsigned long b_blocknr; /* block number */
- unsigned long b_size; /* block size */
kdev_t b_dev; /* device (B_FREE = free) */
kdev_t b_rdev; /* Real device */
- unsigned long b_rsector; /* Real buffer location on disk */
struct buffer_head * b_this_page; /* circular list of buffers in one page */
unsigned long b_state; /* buffer state bitmap (see above) */
struct buffer_head * b_next_free;
unsigned int b_count; /* users using this block */
+ unsigned int b_list; /* List that this buffer appears */

/* Non-performance-critical data follows. */
+ unsigned long b_size; /* block size */
+ unsigned long b_rsector; /* Real buffer location on disk */
char * b_data; /* pointer to data block (1024 bytes) */
- unsigned int b_list; /* List that this buffer appears */
unsigned long b_flushtime; /* Time when this (dirty) buffer
* should be written */
struct wait_queue * b_wait;
struct buffer_head ** b_pprev; /* doubly linked list of hash-queue */
struct buffer_head * b_prev_free; /* doubly linked list of buffers */
+
+ struct inode * b_inode;
+ struct list_head b_inode_buffers; /* doubly linked list of inode dirty buffers */
+
struct buffer_head * b_reqnext; /* request queue */

/*
@@ -333,6 +330,8 @@
struct list_head i_list;
struct list_head i_dentry;

+ struct list_head i_dirty_buffers;
+
unsigned long i_ino;
unsigned int i_count;
kdev_t i_dev;
@@ -358,7 +357,7 @@
struct vm_area_struct *i_mmap;
struct page *i_pages;
struct dquot *i_dquot[MAXQUOTAS];
-
+
unsigned long i_state;

unsigned int i_flags;
@@ -393,15 +392,23 @@
};

/* Inode state bits.. */
-#define I_DIRTY 1
-#define I_LOCK 2
-#define I_FREEING 4
+#define I_DIRTY_SYNC 1 /* Not dirty enough for O_DATASYNC */
+#define I_DIRTY_DATASYNC 2 /* Data-related inode changes pending */
+#define I_LOCK 4
+#define I_FREEING 8

-extern void __mark_inode_dirty(struct inode *);
+#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
+
+extern void __mark_inode_dirty(struct inode *, int);
static inline void mark_inode_dirty(struct inode *inode)
{
- if (!(inode->i_state & I_DIRTY))
- __mark_inode_dirty(inode);
+ if ((inode->i_state & I_DIRTY) != I_DIRTY)
+ __mark_inode_dirty(inode, I_DIRTY);
+}
+static inline void mark_inode_dirty_sync(struct inode *inode)
+{
+ if (!(inode->i_state & I_DIRTY_SYNC))
+ __mark_inode_dirty(inode, I_DIRTY_SYNC);
}

struct fown_struct {
@@ -587,7 +594,7 @@
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *);
int (*release) (struct inode *, struct file *);
- int (*fsync) (struct file *, struct dentry *);
+ int (*fsync) (struct file *, struct dentry *, int datasync);
int (*fasync) (int, struct file *, int);
int (*check_media_change) (kdev_t dev);
int (*revalidate) (kdev_t dev);
@@ -775,6 +782,13 @@
}
}

+extern void buffer_insert_inode_queue(struct buffer_head *, struct inode *);
+static inline void mark_buffer_dirty_inode(struct buffer_head *bh, int flag, struct inode *inode)
+{
+ buffer_insert_inode_queue(bh, inode);
+ mark_buffer_dirty(bh, flag);
+}
+
extern int check_disk_change(kdev_t dev);
extern int invalidate_inodes(struct super_block * sb);
extern void invalidate_inode_pages(struct inode *);
@@ -784,6 +798,8 @@
extern void write_inode_now(struct inode *inode);
extern void sync_dev(kdev_t dev);
extern int fsync_dev(kdev_t dev);
+extern int fsync_inode_buffers(struct inode *);
+extern int osync_inode_buffers(struct inode *);
extern void sync_supers(kdev_t dev);
extern int bmap(struct inode * inode,int block);
extern int notify_change(struct dentry *, struct iattr *);
@@ -887,8 +903,8 @@
extern ssize_t char_write(struct file *, const char *, size_t, loff_t *);
extern ssize_t block_write(struct file *, const char *, size_t, loff_t *);

-extern int block_fsync(struct file *, struct dentry *dir);
-extern int file_fsync(struct file *, struct dentry *dir);
+extern int block_fsync(struct file *, struct dentry *dir, int);
+extern int file_fsync(struct file *, struct dentry *dir, int);

extern int inode_change_ok(struct inode *, struct iattr *);
extern void inode_setattr(struct inode *, struct iattr *);
--- include/linux/minix_fs.h.~1~ Wed May 12 15:54:03 1999
+++ include/linux/minix_fs.h Wed May 12 16:35:48 1999
@@ -115,7 +115,7 @@
extern void minix_truncate(struct inode *);
extern int init_minix_fs(void);
extern int minix_sync_inode(struct inode *);
-extern int minix_sync_file(struct file *, struct dentry *);
+extern int minix_sync_file(struct file *, struct dentry *, int);

extern struct inode_operations minix_file_inode_operations;
extern struct inode_operations minix_dir_inode_operations;
--- include/linux/qnx4_fs.h.~1~ Wed May 12 15:54:03 1999
+++ include/linux/qnx4_fs.h Wed May 12 16:35:48 1999
@@ -114,7 +114,7 @@
extern void qnx4_free_inode(struct inode *inode);
extern int qnx4_unlink(struct inode *dir, struct dentry *dentry);
extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry);
-extern int qnx4_sync_file(struct file *file, struct dentry *dentry);
+extern int qnx4_sync_file(struct file *file, struct dentry *dentry, int);
extern int qnx4_sync_inode(struct inode *inode);
extern int qnx4_bmap(struct inode *inode, int block);

--- include/linux/sysv_fs.h.~1~ Wed May 12 16:08:21 1999
+++ include/linux/sysv_fs.h Wed May 12 17:12:43 1999
@@ -400,7 +400,7 @@
extern void sysv_write_inode(struct inode *);
extern int sysv_statfs(struct super_block *, struct statfs *, int);
extern int sysv_sync_inode(struct inode *);
-extern int sysv_sync_file(struct file *, struct dentry *);
+extern int sysv_sync_file(struct file *, struct dentry *, int);
extern int sysv_mmap(struct file *, struct vm_area_struct *);

extern struct inode_operations sysv_file_inode_operations;
--- mm/filemap.c.~1~ Wed May 12 15:54:27 1999
+++ mm/filemap.c Wed May 12 16:35:48 1999
@@ -1350,7 +1350,10 @@
struct dentry * dentry = file->f_dentry;
struct inode * inode = dentry->d_inode;
down(&inode->i_sem);
- error = file_fsync(file, dentry);
+ /* According to SingleUnix, we are only
+ * required to implement fdatasync, not
+ * fsync, consistency here. */
+ error = file_fsync(file, dentry, 1);
up(&inode->i_sem);
}
}