patch-2.4.6 linux/fs/buffer.c
Next file: linux/fs/dcache.c
Previous file: linux/fs/block_dev.c
Back to the patch index
Back to the overall index
- Lines: 386
- Date:
Sat Jun 30 10:44:32 2001
- Orig file:
v2.4.5/linux/fs/buffer.c
- Orig date:
Fri May 25 17:57:46 2001
diff -u --recursive --new-file v2.4.5/linux/fs/buffer.c linux/fs/buffer.c
@@ -61,7 +61,7 @@
#define BUFSIZE_INDEX(X) ((int) buffersize_index[(X)>>9])
#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512)
-#define NR_RESERVED (2*MAX_BUF_PER_PAGE)
+#define NR_RESERVED (10*MAX_BUF_PER_PAGE)
#define MAX_UNUSED_BUFFERS NR_RESERVED+20 /* don't ever have more than this
number of unused buffer heads */
@@ -161,144 +161,131 @@
atomic_dec(&bh->b_count);
}
-/* Call sync_buffers with wait!=0 to ensure that the call does not
- * return until all buffer writes have completed. Sync() may return
- * before the writes have finished; fsync() may not.
- */
+/* End-of-write handler.. Just mark it up-to-date and unlock the buffer. */
+static void end_buffer_write(struct buffer_head *bh, int uptodate)
+{
+ mark_buffer_uptodate(bh, uptodate);
+ unlock_buffer(bh);
+}
-/* Godamity-damn. Some buffers (bitmaps for filesystems)
- * spontaneously dirty themselves without ever brelse being called.
- * We will ultimately want to put these in a separate list, but for
- * now we search all of the lists for dirty buffers.
+/*
+ * The buffers have been marked clean and locked. Just submit the dang
+ * things..
+ *
+ * We'll wait for the first one of them - "sync" is not exactly
+ * performance-critical, and this makes us not hog the IO subsystem
+ * completely, while still allowing for a fair amount of concurrent IO.
*/
-static int sync_buffers(kdev_t dev, int wait)
+static void write_locked_buffers(struct buffer_head **array, unsigned int count)
{
- int i, retry, pass = 0, err = 0;
- struct buffer_head * bh, *next;
-
- /* One pass for no-wait, three for wait:
- * 0) write out all dirty, unlocked buffers;
- * 1) write out all dirty buffers, waiting if locked;
- * 2) wait for completion by waiting for all buffers to unlock.
- */
+ struct buffer_head *wait = *array;
+ atomic_inc(&wait->b_count);
do {
- retry = 0;
-
- /* We search all lists as a failsafe mechanism, not because we expect
- * there to be dirty buffers on any of the other lists.
- */
-repeat:
- spin_lock(&lru_list_lock);
- bh = lru_list[BUF_DIRTY];
- if (!bh)
- goto repeat2;
-
- for (i = nr_buffers_type[BUF_DIRTY]*2 ; i-- > 0 ; bh = next) {
- next = bh->b_next_free;
+ struct buffer_head * bh = *array++;
+ bh->b_end_io = end_buffer_write;
+ submit_bh(WRITE, bh);
+ } while (--count);
+ wait_on_buffer(wait);
+ atomic_dec(&wait->b_count);
+}
- if (!lru_list[BUF_DIRTY])
- break;
- if (dev && bh->b_dev != dev)
- continue;
- if (buffer_locked(bh)) {
- /* Buffer is locked; skip it unless wait is
- * requested AND pass > 0.
- */
- if (!wait || !pass) {
- retry = 1;
- continue;
- }
- atomic_inc(&bh->b_count);
- spin_unlock(&lru_list_lock);
- wait_on_buffer (bh);
- atomic_dec(&bh->b_count);
- goto repeat;
- }
+#define NRSYNC (32)
+static void write_unlocked_buffers(kdev_t dev)
+{
+ struct buffer_head *next;
+ struct buffer_head *array[NRSYNC];
+ unsigned int count;
+ int nr;
- /* If an unlocked buffer is not uptodate, there has
- * been an IO error. Skip it.
- */
- if (wait && buffer_req(bh) && !buffer_locked(bh) &&
- !buffer_dirty(bh) && !buffer_uptodate(bh)) {
- err = -EIO;
- continue;
- }
+repeat:
+ spin_lock(&lru_list_lock);
+ next = lru_list[BUF_DIRTY];
+ nr = nr_buffers_type[BUF_DIRTY] * 2;
+ count = 0;
+ while (next && --nr >= 0) {
+ struct buffer_head * bh = next;
+ next = bh->b_next_free;
- /* Don't write clean buffers. Don't write ANY buffers
- * on the third pass.
- */
- if (!buffer_dirty(bh) || pass >= 2)
+ if (dev && bh->b_dev != dev)
+ continue;
+ if (test_and_set_bit(BH_Lock, &bh->b_state))
+ continue;
+ if (atomic_set_buffer_clean(bh)) {
+ __refile_buffer(bh);
+ array[count++] = bh;
+ if (count < NRSYNC)
continue;
- atomic_inc(&bh->b_count);
spin_unlock(&lru_list_lock);
- ll_rw_block(WRITE, 1, &bh);
- atomic_dec(&bh->b_count);
- retry = 1;
+ write_locked_buffers(array, count);
goto repeat;
}
+ unlock_buffer(bh);
+ }
+ spin_unlock(&lru_list_lock);
- repeat2:
- bh = lru_list[BUF_LOCKED];
- if (!bh) {
- spin_unlock(&lru_list_lock);
- break;
- }
- for (i = nr_buffers_type[BUF_LOCKED]*2 ; i-- > 0 ; bh = next) {
- next = bh->b_next_free;
+ if (count)
+ write_locked_buffers(array, count);
+}
- if (!lru_list[BUF_LOCKED])
- break;
- if (dev && bh->b_dev != dev)
- continue;
- if (buffer_locked(bh)) {
- /* Buffer is locked; skip it unless wait is
- * requested AND pass > 0.
- */
- if (!wait || !pass) {
- retry = 1;
- continue;
- }
- atomic_inc(&bh->b_count);
- spin_unlock(&lru_list_lock);
- wait_on_buffer (bh);
- spin_lock(&lru_list_lock);
- atomic_dec(&bh->b_count);
- goto repeat2;
- }
+static int wait_for_locked_buffers(kdev_t dev, int index, int refile)
+{
+ struct buffer_head * next;
+ int nr;
+
+repeat:
+ spin_lock(&lru_list_lock);
+ next = lru_list[index];
+ nr = nr_buffers_type[index] * 2;
+ while (next && --nr >= 0) {
+ struct buffer_head *bh = next;
+ next = bh->b_next_free;
+
+ if (!buffer_locked(bh)) {
+ if (refile)
+ __refile_buffer(bh);
+ continue;
}
- spin_unlock(&lru_list_lock);
+ if (dev && bh->b_dev != dev)
+ continue;
- /* If we are waiting for the sync to succeed, and if any dirty
- * blocks were written, then repeat; on the second pass, only
- * wait for buffers being written (do not pass to write any
- * more buffers on the second pass).
- */
- } while (wait && retry && ++pass<=2);
- return err;
+ atomic_inc(&bh->b_count);
+ spin_unlock(&lru_list_lock);
+ wait_on_buffer (bh);
+ atomic_dec(&bh->b_count);
+ goto repeat;
+ }
+ spin_unlock(&lru_list_lock);
+ return 0;
}
-void sync_dev(kdev_t dev)
+/* Call sync_buffers with wait!=0 to ensure that the call does not
+ * return until all buffer writes have completed. Sync() may return
+ * before the writes have finished; fsync() may not.
+ */
+
+/* Godamity-damn. Some buffers (bitmaps for filesystems)
+ * spontaneously dirty themselves without ever brelse being called.
+ * We will ultimately want to put these in a separate list, but for
+ * now we search all of the lists for dirty buffers.
+ */
+static int sync_buffers(kdev_t dev, int wait)
{
- sync_supers(dev);
- sync_inodes(dev);
- DQUOT_SYNC(dev);
- /* sync all the dirty buffers out to disk only _after_ all the
- high level layers finished generated buffer dirty data
- (or we'll return with some buffer still dirty on the blockdevice
- so breaking the semantics of this call) */
- sync_buffers(dev, 0);
- /*
- * FIXME(eric) we need to sync the physical devices here.
- * This is because some (scsi) controllers have huge amounts of
- * cache onboard (hundreds of Mb), and we need to instruct
- * them to commit all of the dirty memory to disk, and we should
- * not return until this has happened.
- *
- * This would need to get implemented by going through the assorted
- * layers so that each block major number can be synced, and this
- * would call down into the upper and mid-layer scsi.
+ int err = 0;
+
+ /* One pass for no-wait, three for wait:
+ * 0) write out all dirty, unlocked buffers;
+ * 1) wait for all dirty locked buffers;
+ * 2) write out all dirty, unlocked buffers;
+ * 2) wait for completion by waiting for all buffers to unlock.
*/
+ write_unlocked_buffers(dev);
+ if (wait) {
+ err = wait_for_locked_buffers(dev, BUF_DIRTY, 0);
+ write_unlocked_buffers(dev);
+ err |= wait_for_locked_buffers(dev, BUF_LOCKED, 1);
+ }
+ return err;
}
int fsync_super(struct super_block *sb)
@@ -331,6 +318,15 @@
return sync_buffers(dev, 1);
}
+/*
+ * There's no real reason to pretend we should
+ * ever do anything differently
+ */
+void sync_dev(kdev_t dev)
+{
+ fsync_dev(dev);
+}
+
asmlinkage long sys_sync(void)
{
fsync_dev(0);
@@ -646,8 +642,8 @@
/* Another device? */
if (bh->b_dev != dev)
continue;
- /* Part of a mapping? */
- if (bh->b_page->mapping)
+ /* Not hashed? */
+ if (!bh->b_pprev)
continue;
if (buffer_locked(bh)) {
atomic_inc(&bh->b_count);
@@ -711,6 +707,9 @@
bh_next = bh->b_next_free;
if (bh->b_dev != dev || bh->b_size == size)
continue;
+ /* Unhashed? */
+ if (!bh->b_pprev)
+ continue;
if (buffer_locked(bh)) {
atomic_inc(&bh->b_count);
spin_unlock(&lru_list_lock);
@@ -759,7 +758,7 @@
{
balance_dirty(NODEV);
if (free_shortage())
- page_launder(GFP_BUFFER, 0);
+ page_launder(GFP_NOFS, 0);
if (!grow_buffers(size)) {
wakeup_bdflush(1);
current->policy |= SCHED_YIELD;
@@ -1220,11 +1219,11 @@
}
spin_unlock(&unused_list_lock);
- /* This is critical. We can't swap out pages to get
- * more buffer heads, because the swap-out may need
- * more buffer-heads itself. Thus SLAB_BUFFER.
+ /* This is critical. We can't call out to the FS
+ * to get more buffer heads, because the FS may need
+ * more buffer-heads itself. Thus SLAB_NOFS.
*/
- if((bh = kmem_cache_alloc(bh_cachep, SLAB_BUFFER)) != NULL) {
+ if((bh = kmem_cache_alloc(bh_cachep, SLAB_NOFS)) != NULL) {
bh->b_blocknr = -1;
bh->b_this_page = NULL;
return bh;
@@ -1348,11 +1347,9 @@
*/
run_task_queue(&tq_disk);
- /*
- * Set our state for sleeping, then check again for buffer heads.
- * This ensures we won't miss a wake_up from an interrupt.
- */
- wait_event(buffer_wait, nr_unused_buffer_heads >= MAX_BUF_PER_PAGE);
+ current->policy |= SCHED_YIELD;
+ __set_current_state(TASK_RUNNING);
+ schedule();
goto try_again;
}
@@ -2242,7 +2239,7 @@
return 0;
}
- page = alloc_page(GFP_BUFFER);
+ page = alloc_page(GFP_NOFS);
if (!page)
goto out;
LockPage(page);
@@ -2304,7 +2301,7 @@
* 1 - start IO for dirty buffers
* 2 - wait for completion of locked buffers
*/
-static void sync_page_buffers(struct buffer_head *bh, int wait)
+static void sync_page_buffers(struct buffer_head *bh, unsigned int gfp_mask)
{
struct buffer_head * tmp = bh;
@@ -2312,7 +2309,7 @@
struct buffer_head *p = tmp;
tmp = tmp->b_this_page;
if (buffer_locked(p)) {
- if (wait > 1)
+ if (gfp_mask & __GFP_WAIT)
__wait_on_buffer(p);
} else if (buffer_dirty(p))
ll_rw_block(WRITE, 1, &p);
@@ -2336,7 +2333,7 @@
* obtain a reference to a buffer head within a page. So we must
* lock out all of these paths to cleanly toss the page.
*/
-int try_to_free_buffers(struct page * page, int wait)
+int try_to_free_buffers(struct page * page, unsigned int gfp_mask)
{
struct buffer_head * tmp, * bh = page->buffers;
int index = BUFSIZE_INDEX(bh->b_size);
@@ -2387,10 +2384,10 @@
spin_unlock(&free_list[index].lock);
write_unlock(&hash_table_lock);
spin_unlock(&lru_list_lock);
- if (wait) {
- sync_page_buffers(bh, wait);
+ if (gfp_mask & __GFP_IO) {
+ sync_page_buffers(bh, gfp_mask);
/* We waited synchronously, so we can free the buffers. */
- if (wait > 1 && !loop) {
+ if ((gfp_mask & __GFP_WAIT) && !loop) {
loop = 1;
goto cleaned_buffers_try_again;
}
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)