patch-2.4.4 linux/fs/inode.c

Next file: linux/fs/iobuf.c
Previous file: linux/fs/hpfs/super.c
Back to the patch index
Back to the overall index

diff -u --recursive --new-file v2.4.3/linux/fs/inode.c linux/fs/inode.c
@@ -13,6 +13,8 @@
 #include <linux/quotaops.h>
 #include <linux/slab.h>
 #include <linux/cache.h>
+#include <linux/swap.h>
+#include <linux/swapctl.h>
 
 /*
  * New inode.c implementation.
@@ -131,28 +133,26 @@
 {
 	struct super_block * sb = inode->i_sb;
 
-	if (sb) {
-		/* Don't do this for I_DIRTY_PAGES - that doesn't actually dirty the inode itself */
-		if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
-			if (sb->s_op && sb->s_op->dirty_inode)
-				sb->s_op->dirty_inode(inode);
-		}
+	/* Don't do this for I_DIRTY_PAGES - that doesn't actually dirty the inode itself */
+	if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
+		if (sb->s_op && sb->s_op->dirty_inode)
+			sb->s_op->dirty_inode(inode);
+	}
 
-		/* avoid the locking if we can */
-		if ((inode->i_state & flags) == flags)
-			return;
+	/* avoid the locking if we can */
+	if ((inode->i_state & flags) == flags)
+		return;
 
-		spin_lock(&inode_lock);
-		if ((inode->i_state & flags) != flags) {
-			inode->i_state |= flags;
-			/* Only add valid (ie hashed) inodes to the dirty list */
-			if (!list_empty(&inode->i_hash)) {
-				list_del(&inode->i_list);
-				list_add(&inode->i_list, &sb->s_dirty);
-			}
+	spin_lock(&inode_lock);
+	if ((inode->i_state & flags) != flags) {
+		inode->i_state |= flags;
+		/* Only add valid (ie hashed) inodes to the dirty list */
+		if (!(inode->i_state & I_LOCK) && !list_empty(&inode->i_hash)) {
+			list_del(&inode->i_list);
+			list_add(&inode->i_list, &sb->s_dirty);
 		}
-		spin_unlock(&inode_lock);
 	}
+	spin_unlock(&inode_lock);
 }
 
 static void __wait_on_inode(struct inode * inode)
@@ -179,7 +179,7 @@
 
 static inline void write_inode(struct inode *inode, int sync)
 {
-	if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode)
+	if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
 		inode->i_sb->s_op->write_inode(inode, sync);
 }
 
@@ -190,13 +190,53 @@
 		return;
 	}
 	atomic_inc(&inode->i_count);
-	if (!(inode->i_state & I_DIRTY)) {
+	if (!(inode->i_state & (I_DIRTY|I_LOCK))) {
 		list_del(&inode->i_list);
 		list_add(&inode->i_list, &inode_in_use);
 	}
 	inodes_stat.nr_unused--;
 }
 
+static inline void __sync_one(struct inode *inode, int sync)
+{
+	unsigned dirty;
+
+	list_del(&inode->i_list);
+	list_add(&inode->i_list, &inode->i_sb->s_locked_inodes);
+
+	if (inode->i_state & I_LOCK)
+		BUG();
+
+	/* Set I_LOCK, reset I_DIRTY */
+	dirty = inode->i_state & I_DIRTY;
+	inode->i_state |= I_LOCK;
+	inode->i_state &= ~I_DIRTY;
+	spin_unlock(&inode_lock);
+
+	filemap_fdatasync(inode->i_mapping);
+
+	/* Don't write the inode if only I_DIRTY_PAGES was set */
+	if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC))
+		write_inode(inode, sync);
+
+	filemap_fdatawait(inode->i_mapping);
+
+	spin_lock(&inode_lock);
+	inode->i_state &= ~I_LOCK;
+	if (!(inode->i_state & I_FREEING)) {
+		struct list_head *to;
+		if (inode->i_state & I_DIRTY)
+			to = &inode->i_sb->s_dirty;
+		else if (atomic_read(&inode->i_count))
+			to = &inode_in_use;
+		else
+			to = &inode_unused;
+		list_del(&inode->i_list);
+		list_add(&inode->i_list, to);
+	}
+	wake_up(&inode->i_wait);
+}
+
 static inline void sync_one(struct inode *inode, int sync)
 {
 	if (inode->i_state & I_LOCK) {
@@ -206,38 +246,77 @@
 		iput(inode);
 		spin_lock(&inode_lock);
 	} else {
-		unsigned dirty;
-
-		list_del(&inode->i_list);
-		list_add(&inode->i_list, atomic_read(&inode->i_count)
-							? &inode_in_use
-							: &inode_unused);
-		/* Set I_LOCK, reset I_DIRTY */
-		dirty = inode->i_state & I_DIRTY;
-		inode->i_state |= I_LOCK;
-		inode->i_state &= ~I_DIRTY;
-		spin_unlock(&inode_lock);
-
-		filemap_fdatasync(inode->i_mapping);
+		__sync_one(inode, sync);
+	}
+}
 
-		/* Don't write the inode if only I_DIRTY_PAGES was set */
-		if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC))
-			write_inode(inode, sync);
+static inline void sync_list(struct list_head *head)
+{
+	struct list_head * tmp;
 
-		filemap_fdatawait(inode->i_mapping);
+	while ((tmp = head->prev) != head) 
+		__sync_one(list_entry(tmp, struct inode, i_list), 0);
+}
 
+static inline int wait_on_dirty(struct list_head *head)
+{
+	struct list_head * tmp;
+	list_for_each(tmp, head) {
+		struct inode *inode = list_entry(tmp, struct inode, i_list);
+		if (!inode->i_state & I_DIRTY)
+			continue;
+		__iget(inode);
+		spin_unlock(&inode_lock);
+		__wait_on_inode(inode);
+		iput(inode);
 		spin_lock(&inode_lock);
-		inode->i_state &= ~I_LOCK;
-		wake_up(&inode->i_wait);
+		return 1;
 	}
+	return 0;
 }
 
-static inline void sync_list(struct list_head *head)
+static inline void wait_on_locked(struct list_head *head)
 {
 	struct list_head * tmp;
+	while ((tmp = head->prev) != head) {
+		struct inode *inode = list_entry(tmp, struct inode, i_list);
+		__iget(inode);
+		spin_unlock(&inode_lock);
+		__wait_on_inode(inode);
+		iput(inode);
+		spin_lock(&inode_lock);
+	}
+}
 
-	while ((tmp = head->prev) != head)
-		sync_one(list_entry(tmp, struct inode, i_list), 0);
+static inline int try_to_sync_unused_list(struct list_head *head)
+{
+	struct list_head *tmp = head;
+	struct inode *inode;
+
+	while ((tmp = tmp->prev) != head) {
+		inode = list_entry(tmp, struct inode, i_list);
+
+		if (!atomic_read(&inode->i_count)) {
+			/* 
+			 * We're under PF_MEMALLOC here, and syncing the 
+			 * inode may have to allocate memory. To avoid
+			 * running into a OOM deadlock, we write one 
+			 * inode synchronously and stop syncing in case 
+			 * we're under freepages.low
+			 */
+
+			int sync = nr_free_pages() < freepages.low;
+			__sync_one(inode, sync);
+			if (sync) 
+				return 0;
+			/* 
+			 * __sync_one moved the inode to another list,
+			 * so we have to start looking from the list head.
+			 */
+			tmp = head;
+		}
+	}
+	return 1;
 }
 
 /**
@@ -247,7 +326,31 @@
  *	sync_inodes goes through the super block's dirty list, 
  *	writes them out, and puts them back on the normal list.
  */
+
+/*
+ * caller holds exclusive lock on sb->s_umount
+ */
  
+void sync_inodes_sb(struct super_block *sb)
+{
+	spin_lock(&inode_lock);
+	sync_list(&sb->s_dirty);
+	wait_on_locked(&sb->s_locked_inodes);
+	spin_unlock(&inode_lock);
+}
+
+void sync_unlocked_inodes(void)
+{
+	struct super_block * sb = sb_entry(super_blocks.next);
+	for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
+		if (!list_empty(&sb->s_dirty)) {
+			spin_lock(&inode_lock);
+			sync_list(&sb->s_dirty);
+			spin_unlock(&inode_lock);
+		}
+	}
+}
+
 void sync_inodes(kdev_t dev)
 {
 	struct super_block * sb = sb_entry(super_blocks.next);
@@ -255,31 +358,36 @@
 	/*
 	 * Search the super_blocks array for the device(s) to sync.
 	 */
-	spin_lock(&inode_lock);
 	for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
 		if (!sb->s_dev)
 			continue;
 		if (dev && sb->s_dev != dev)
 			continue;
-
-		sync_list(&sb->s_dirty);
-
+		down_read(&sb->s_umount);
+		if (sb->s_dev && (sb->s_dev == dev || !dev)) {
+			spin_lock(&inode_lock);
+			do {
+				sync_list(&sb->s_dirty);
+			} while (wait_on_dirty(&sb->s_locked_inodes));
+			spin_unlock(&inode_lock);
+		}
+		up_read(&sb->s_umount);
 		if (dev)
 			break;
 	}
-	spin_unlock(&inode_lock);
 }
 
 /*
  * Called with the spinlock already held..
  */
-static void sync_all_inodes(void)
+static void try_to_sync_unused_inodes(void)
 {
 	struct super_block * sb = sb_entry(super_blocks.next);
 	for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
 		if (!sb->s_dev)
 			continue;
-		sync_list(&sb->s_dirty);
+		if (!try_to_sync_unused_list(&sb->s_dirty))
+			break;
 	}
 }
 
@@ -476,6 +584,7 @@
 	busy = invalidate_list(&inode_in_use, sb, &throw_away);
 	busy |= invalidate_list(&inode_unused, sb, &throw_away);
 	busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
+	busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away);
 	spin_unlock(&inode_lock);
 
 	dispose_list(&throw_away);
@@ -503,13 +612,12 @@
 {
 	LIST_HEAD(list);
 	struct list_head *entry, *freeable = &list;
-	int count = 0;
+	int count = 0, synced = 0;
 	struct inode * inode;
 
 	spin_lock(&inode_lock);
-	/* go simple and safe syncing everything before starting */
-	sync_all_inodes();
 
+free_unused:
 	entry = inode_unused.prev;
 	while (entry != &inode_unused)
 	{
@@ -517,7 +625,7 @@
 
 		entry = entry->prev;
 		inode = INODE(tmp);
-		if (inode->i_state & (I_FREEING|I_CLEAR))
+		if (inode->i_state & (I_FREEING|I_CLEAR|I_LOCK))
 			BUG();
 		if (!CAN_UNUSE(inode))
 			continue;
@@ -536,6 +644,20 @@
 	spin_unlock(&inode_lock);
 
 	dispose_list(freeable);
+
+	/* 
+	 * If we freed enough clean inodes, avoid writing 
+	 * dirty ones. Also giveup if we already tried to
+	 * sync dirty inodes.
+	 */
+	if (!goal || synced)
+		return;
+	
+	synced = 1;
+
+	spin_lock(&inode_lock);
+	try_to_sync_unused_inodes();
+	goto free_unused;
 }
 
 void shrink_icache_memory(int priority, int gfp_mask)
@@ -886,10 +1008,9 @@
 				BUG();
 		} else {
 			if (!list_empty(&inode->i_hash)) {
-				if (!(inode->i_state & I_DIRTY)) {
+				if (!(inode->i_state & (I_DIRTY|I_LOCK))) {
 					list_del(&inode->i_list);
-					list_add(&inode->i_list,
-						 &inode_unused);
+					list_add(&inode->i_list, &inode_unused);
 				}
 				inodes_stat.nr_unused++;
 				spin_unlock(&inode_lock);
@@ -1032,23 +1153,25 @@
 	/* We have to be protected against other CPUs */
 	spin_lock(&inode_lock);
  
-	for (act_head = inode_in_use.next; act_head != &inode_in_use; act_head = act_head->next) {
+	list_for_each(act_head, &inode_in_use) {
 		inode = list_entry(act_head, struct inode, i_list);
-		if (inode->i_sb != sb || !IS_QUOTAINIT(inode))
-			continue;
-		remove_inode_dquot_ref(inode, type, &tofree_head);
+		if (inode->i_sb == sb && IS_QUOTAINIT(inode))
+			remove_inode_dquot_ref(inode, type, &tofree_head);
 	}
-	for (act_head = inode_unused.next; act_head != &inode_unused; act_head = act_head->next) {
+	list_for_each(act_head, &inode_unused) {
 		inode = list_entry(act_head, struct inode, i_list);
-		if (inode->i_sb != sb || !IS_QUOTAINIT(inode))
-			continue;
-		remove_inode_dquot_ref(inode, type, &tofree_head);
+		if (inode->i_sb == sb && IS_QUOTAINIT(inode))
+			remove_inode_dquot_ref(inode, type, &tofree_head);
 	}
-	for (act_head = sb->s_dirty.next; act_head != &sb->s_dirty; act_head = act_head->next) {
+	list_for_each(act_head, &sb->s_dirty) {
 		inode = list_entry(act_head, struct inode, i_list);
-		if (!IS_QUOTAINIT(inode))
-			continue;
-  		remove_inode_dquot_ref(inode, type, &tofree_head);
+		if (IS_QUOTAINIT(inode))
+			remove_inode_dquot_ref(inode, type, &tofree_head);
+	}
+	list_for_each(act_head, &sb->s_locked_inodes) {
+		inode = list_entry(act_head, struct inode, i_list);
+		if (IS_QUOTAINIT(inode))
+			remove_inode_dquot_ref(inode, type, &tofree_head);
 	}
 	spin_unlock(&inode_lock);
 

FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)