--- drivers/acorn/block/fd1772.c.~1~ Thu Jan 7 16:46:58 1999
+++ drivers/acorn/block/fd1772.c Thu May 27 23:57:19 1999
@@ -1569,7 +1569,7 @@
if (!filp || (filp->f_mode & (2 | OPEN_WRITE_BIT)))
/* if the file is mounted OR (writable now AND writable at open
time) Linus: Does this cover all cases? */
- block_fsync(inode, filp);
+ block_fsync(inode, filp, 0);
if (fd_ref[drive] < 0)
fd_ref[drive] = 0;
--- drivers/block/ataflop.c.~1~ Tue Dec 29 19:23:59 1998
+++ drivers/block/ataflop.c Thu May 27 23:57:20 1999
@@ -1986,7 +1986,7 @@
* sync is required. Otherwise, sync if filp is writable.
*/
if (filp && (filp->f_mode & (2 | OPEN_WRITE_BIT)))
- block_fsync (filp, filp->f_dentry);
+ block_fsync (filp, filp->f_dentry, 0);
if (fd_ref[drive] < 0)
fd_ref[drive] = 0;
--- drivers/block/floppy.c.~1~ Tue Apr 20 22:33:21 1999
+++ drivers/block/floppy.c Thu May 27 23:57:20 1999
@@ -3677,7 +3677,7 @@
* sync is required. Otherwise, sync if filp is writable.
*/
if (filp && (filp->f_mode & (2 | OPEN_WRITE_BIT)))
- block_fsync(filp, filp->f_dentry);
+ block_fsync(filp, filp->f_dentry, 0);
if (UDRS->fd_ref < 0)
UDRS->fd_ref=0;
--- drivers/block/swim3.c.~1~ Tue Apr 20 22:33:02 1999
+++ drivers/block/swim3.c Thu May 27 23:57:20 1999
@@ -941,7 +941,7 @@
* sync is required. Otherwise, sync if filp is writable.
*/
if (filp && (filp->f_mode & (2 | OPEN_WRITE_BIT)))
- block_fsync (filp, filp->f_dentry);
+ block_fsync (filp, filp->f_dentry, 0);
fs = &floppy_states[devnum];
sw = fs->swim3;
--- fs/block_dev.c.~1~ Fri Nov 13 18:07:26 1998
+++ fs/block_dev.c Thu May 27 23:57:20 1999
@@ -292,7 +292,7 @@
* since the vma has no handle.
*/
-int block_fsync(struct file *filp, struct dentry *dentry)
+int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
{
return fsync_dev(dentry->d_inode->i_rdev);
}
--- fs/buffer.c.~1~ Thu May 27 23:54:35 1999
+++ fs/buffer.c Sat May 29 01:29:37 1999
@@ -55,6 +55,8 @@
#define MAX_UNUSED_BUFFERS NR_RESERVED+20 /* don't ever have more than this
number of unused buffer heads */
+#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_inode_buffers)
+
/*
* Hash table mask..
*/
@@ -301,7 +303,7 @@
* filp may be NULL if called via the msync of a vma.
*/
- /* this needs further work, at the moment it is identical to fsync() */
down(&inode->i_sem);
- err = file->f_op->fsync(file, dentry);
+ err = file->f_op->fsync(file, dentry, 1);
up(&inode->i_sem);
+
+/*
+ * Synchronise all the inode's dirty buffers to the disk.
+ *
+ * We have conflicting pressures: we want to make sure that all
+ * initially dirty buffers get waited on, but that any subsequently
+ * dirtied buffers don't. After all, we don't want fsync to last
+ * forever if somebody is actively writing to the file.
+ *
+ * Do this in two main stages: first we copy dirty buffers to a
+ * temporary inode list, queueing the writes as we go. Then we clean
+ * up, waiting for those writes to complete.
+ *
+ * During this second stage, any subsequent updates to the file may end
+ * up refiling the buffer on the original inode's dirty list again, so
+ * there is a chance we will end up with a buffer queued for write but
+ * not yet completed on that list. So, as a final cleanup we go through
+ * the osync code to catch these locked, dirty buffers without requeuing
+ * any newly dirty buffers for write.
+ */
+
+int fsync_inode_buffers(struct inode *inode)
+{
+ struct buffer_head *bh;
+ struct inode tmp;
+ int err = 0, err2;
+
+ INIT_LIST_HEAD(&tmp.i_dirty_buffers);
+
+ while (!list_empty(&inode->i_dirty_buffers)) {
+ bh = BH_ENTRY(inode->i_dirty_buffers.next);
+ list_del(&bh->b_inode_buffers);
+ if (!buffer_dirty(bh) && !buffer_locked(bh))
+ bh->b_inode = NULL;
+ else {
+ bh->b_inode = &tmp;
+ list_add(&bh->b_inode_buffers, &tmp.i_dirty_buffers);
+ bh->b_count++;
+ if (buffer_dirty(bh))
+ ll_rw_block(WRITE, 1, &bh);
+ }
+ }
+
+ while (!list_empty(&tmp.i_dirty_buffers)) {
+ bh = BH_ENTRY(tmp.i_dirty_buffers.prev);
+ remove_inode_queue(bh);
+ wait_on_buffer(bh);
+ if (!buffer_uptodate(bh))
+ err = -EIO;
+ brelse(bh);
+ }
+
+ err2 = osync_inode_buffers(inode);
+
+ if (err)
+ return err;
+ else
+ return err2;
+}
+
+
+/*
+ * osync is designed to support O_SYNC io. It waits synchronously for
+ * all already-submitted IO to complete, but does not queue any new
+ * writes to the disk.
+ *
+ * To do O_SYNC writes, just queue the buffer writes with ll_rw_block as
+ * you dirty the buffers, and then use osync_inode_buffers to wait for
+ * completion. Any other dirty buffers which are not yet queued for
+ * write will not be flushed to disk by the osync.
+ */
+
+int osync_inode_buffers(struct inode *inode)
+{
+ struct buffer_head *bh;
+ struct list_head *list;
+ int err = 0;
+
+ repeat:
+
+ for (list = inode->i_dirty_buffers.prev;
+ bh = BH_ENTRY(list), list != &inode->i_dirty_buffers;
+ list = bh->b_inode_buffers.prev) {
+ if (buffer_locked(bh)) {
+ bh->b_count++;
+ wait_on_buffer(bh);
+ brelse(bh);
+ if (!buffer_uptodate(bh))
+ err = -EIO;
+ goto repeat;
+ }
+ }
+ return err;
+}
+
+
+/*
+ * Invalidate any and all dirty buffers on a given inode. We are
+ * probably unmounting the fs, but that doesn't mean we have already
+ * done a sync(). Just drop the buffers from the inode list.
+ */
+
+void invalidate_inode_buffers(struct inode *inode)
+{
+ struct list_head *list, *next;
+
+ list = inode->i_dirty_buffers.next;
+ while (list != &inode->i_dirty_buffers) {
+ next = list->next;
+ remove_inode_queue(BH_ENTRY(list));
+ list = next;
+ }
+}
+
+
struct buffer_head * find_buffer(kdev_t dev, int block, int size)
{
struct buffer_head * next;
@@ -557,7 +695,7 @@
if (!next)
break;
next = tmp->b_next;
- if (tmp->b_blocknr != block || tmp->b_size != size || tmp->b_dev != dev)
+ if (tmp->b_blocknr != block || tmp->b_dev != dev || tmp->b_size != size)
continue;
next = tmp;
break;
@@ -570,8 +708,7 @@
* As we don't lock buffers (unless we are reading them, that is),
* something might happen to it while we sleep (ie a read-error
* will force it bad). This shouldn't really happen currently, but
- * the code is ready.
- */
+ * the code is ready. */
struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
{
struct buffer_head * bh;
@@ -640,6 +777,7 @@
wait_on_buffer(bh);
bhnext->b_count--;
if (bh->b_dev == dev && bh->b_size != size) {
+ remove_inode_queue(bh);
clear_bit(BH_Dirty, &bh->b_state);
clear_bit(BH_Uptodate, &bh->b_state);
clear_bit(BH_Req, &bh->b_state);
@@ -776,13 +914,14 @@
dispose = BUF_CLEAN;
if(dispose != buf->b_list) {
file_buffer(buf, dispose);
- if(dispose == BUF_DIRTY) {
+ switch (dispose) {
+ case BUF_DIRTY: {
int too_many = (nr_buffers * bdf_prm.b_un.nfract/100);
/* This buffer is dirty, maybe we need to start flushing.
* If too high a percentage of the buffers are dirty...
*/
- if (nr_buffers_type[BUF_DIRTY] > too_many)
+ if (nr_buffers_type[BUF_DIRTY] > too_many)
wakeup_bdflush(1);
/* If this is a loop device, and
@@ -792,6 +931,11 @@
if (MAJOR(buf->b_dev) == LOOP_MAJOR &&
nr_buffers_type[BUF_DIRTY]*2>nr_buffers)
wakeup_bdflush(1);
+ break;
+ }
+ case BUF_CLEAN:
+ remove_inode_queue(buf);
+ default:
}
}
}
@@ -821,6 +965,7 @@
*/
void __bforget(struct buffer_head * buf)
{
+ remove_inode_queue(buf);
if (buf->b_count != 1 || buffer_locked(buf)) {
__brelse(buf);
return;
@@ -1436,6 +1581,7 @@
tmp = tmp->b_this_page;
nr_buffers--;
remove_from_queues(p);
+ remove_inode_queue(p);
put_unused_buffer_head(p);
} while (tmp != bh);
/* fsync.c */
-extern int ext2_sync_file (struct file *, struct dentry *);
+extern int ext2_fsync_file (struct file *, struct dentry *, int);
+extern int ext2_fsync_inode (struct inode *, int);
+extern int ext2_osync_inode (struct inode *, int);
/* ialloc.c */
extern struct inode * ext2_new_inode (const struct inode *, int, int *);
--- include/linux/fs.h.~1~ Thu May 27 07:31:34 1999
+++ include/linux/fs.h Fri May 28 00:12:46 1999
@@ -184,38 +184,35 @@
#define BH_Protected 6 /* 1 if the buffer is protected */
/*
- * Try to keep the most commonly used fields in single cache lines (16
+ * Try to keep the most commonly used fields in single cache lines (32
* bytes) to improve performance. This ordering should be
- * particularly beneficial on 32-bit processors.
- *
- * We use the first 16 bytes for the data which is used in searches
- * over the block hash lists (ie. getblk(), find_buffer() and
- * friends).
- *
- * The second 16 bytes we use for lru buffer scans, as used by
- * sync_buffers() and refill_freelist(). -- sct
+ * particularly beneficial on 32-bit processors. -- sct
*/
struct buffer_head {
/* First cache line: */
struct buffer_head * b_next; /* Hash queue list */
unsigned long b_blocknr; /* block number */
- unsigned long b_size; /* block size */
kdev_t b_dev; /* device (B_FREE = free) */
kdev_t b_rdev; /* Real device */
- unsigned long b_rsector; /* Real buffer location on disk */
struct buffer_head * b_this_page; /* circular list of buffers in one page */
unsigned long b_state; /* buffer state bitmap (see above) */
struct buffer_head * b_next_free;
unsigned int b_count; /* users using this block */
+ unsigned int b_list; /* List that this buffer appears */
/* Non-performance-critical data follows. */
+ unsigned long b_size; /* block size */
+ unsigned long b_rsector; /* Real buffer location on disk */
char * b_data; /* pointer to data block (1024 bytes) */
- unsigned int b_list; /* List that this buffer appears */
unsigned long b_flushtime; /* Time when this (dirty) buffer
* should be written */
struct wait_queue * b_wait;
struct buffer_head ** b_pprev; /* doubly linked list of hash-queue */
struct buffer_head * b_prev_free; /* doubly linked list of buffers */
+
+ struct inode * b_inode;
+ struct list_head b_inode_buffers; /* doubly linked list of inode dirty buffers */
+
struct buffer_head * b_reqnext; /* request queue */