Patch from Manfred Spraul <manfred@colorfullife.com>

Fixes the lock contention over file_list_lock by simply removing the unneeded
anon_list.  So filp allocation does not need to take a global lock any more.

The use of a spinlock to protect files_stat.nr_files is a bit awkward, but
the alternative is a custom sysctl handler, and isn't much more efficient
anyway.



 fs/dcache.c          |    3 -
 fs/file_table.c      |  132 ++++++++++++++++++++++++++++++---------------------
 include/linux/file.h |    3 +
 3 files changed, 85 insertions(+), 53 deletions(-)

diff -puN fs/dcache.c~file_list_lock-contention-fix fs/dcache.c
--- 25/fs/dcache.c~file_list_lock-contention-fix	2003-03-17 21:32:04.000000000 -0800
+++ 25-akpm/fs/dcache.c	2003-03-17 21:32:04.000000000 -0800
@@ -24,6 +24,7 @@
 #include <linux/cache.h>
 #include <linux/module.h>
 #include <linux/mount.h>
+#include <linux/file.h>
 #include <asm/uaccess.h>
 #include <linux/security.h>
 
@@ -1574,7 +1575,7 @@ void __init vfs_caches_init(unsigned lon
 
 	filp_cachep = kmem_cache_create("filp", 
 			sizeof(struct file), 0,
-			SLAB_HWCACHE_ALIGN, NULL, NULL);
+			SLAB_HWCACHE_ALIGN, filp_ctor, filp_dtor);
 	if(!filp_cachep)
 		panic("Cannot create filp SLAB cache");
 
diff -puN fs/file_table.c~file_list_lock-contention-fix fs/file_table.c
--- 25/fs/file_table.c~file_list_lock-contention-fix	2003-03-17 21:32:04.000000000 -0800
+++ 25-akpm/fs/file_table.c	2003-03-17 21:32:04.000000000 -0800
@@ -22,12 +22,45 @@ struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
-/* Here the new files go */
-static LIST_HEAD(anon_list);
-/* And here the free ones sit */
+/* list of free filps for root */
 static LIST_HEAD(free_list);
 /* public *and* exported. Not pretty! */
-spinlock_t files_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+spinlock_t __cacheline_aligned_in_smp files_lock = SPIN_LOCK_UNLOCKED;
+
+static spinlock_t filp_count_lock = SPIN_LOCK_UNLOCKED;
+
+/* slab constructors and destructors are called from arbitrary
+ * context and must be fully threaded - use a local spinlock
+ * to protect files_stat.nr_files
+ */
+void filp_ctor(void * objp, struct kmem_cache_s *cachep, unsigned long cflags)
+{
+	if ((cflags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR) {
+		unsigned long flags;
+		spin_lock_irqsave(&filp_count_lock, flags);
+		files_stat.nr_files++;
+		spin_unlock_irqrestore(&filp_count_lock, flags);
+	}
+}
+
+void filp_dtor(void * objp, struct kmem_cache_s *cachep, unsigned long dflags)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&filp_count_lock, flags);
+	files_stat.nr_files--;
+	spin_unlock_irqrestore(&filp_count_lock, flags);
+}
+
+static inline void file_free(struct file* f)
+{
+	if (files_stat.nr_free_files <= NR_RESERVED_FILES) {
+		list_add(&f->f_list, &free_list);
+		files_stat.nr_free_files++;
+	} else {
+		kmem_cache_free(filp_cachep, f);
+	}
+}
 
 /* Find an unused file structure and return a pointer to it.
  * Returns NULL, if there are no more free file structures or
@@ -37,57 +70,48 @@ spinlock_t files_lock __cacheline_aligne
  */
 struct file * get_empty_filp(void)
 {
-	static int old_max = 0;
+static int old_max = 0;
 	struct file * f;
 
+	if (likely(files_stat.nr_files < files_stat.max_files)) {
+		f = kmem_cache_alloc(filp_cachep, SLAB_KERNEL);
+		if (f) {
+got_one:
+			memset(f, 0, sizeof(*f));
+			if (security_file_alloc(f)) {
+				file_list_lock();
+				file_free(f);
+				file_list_unlock();
+
+				return NULL;
+			}
+			eventpoll_init_file(f);
+			atomic_set(&f->f_count,1);
+			f->f_uid = current->fsuid;
+			f->f_gid = current->fsgid;
+			f->f_owner.lock = RW_LOCK_UNLOCKED;
+			/* f->f_version, f->f_list.next: 0 */
+			return f;
+		}
+	}
+	/* Use a reserved one if we're the superuser */
 	file_list_lock();
-	if (files_stat.nr_free_files > NR_RESERVED_FILES) {
-	used_one:
+	if (files_stat.nr_free_files && !current->euid) {
 		f = list_entry(free_list.next, struct file, f_list);
 		list_del(&f->f_list);
 		files_stat.nr_free_files--;
-	new_one:
-		memset(f, 0, sizeof(*f));
-		if (security_file_alloc(f)) {
-			list_add(&f->f_list, &free_list);
-			files_stat.nr_free_files++;
-			file_list_unlock();
-			return NULL;
-		}
-		eventpoll_init_file(f);
-		atomic_set(&f->f_count,1);
-		f->f_version = 0;
-		f->f_uid = current->fsuid;
-		f->f_gid = current->fsgid;
-		f->f_owner.lock = RW_LOCK_UNLOCKED;
-		list_add(&f->f_list, &anon_list);
 		file_list_unlock();
-		return f;
+		goto got_one;
 	}
-	/*
-	 * Use a reserved one if we're the superuser
-	 */
-	if (files_stat.nr_free_files && !current->euid)
-		goto used_one;
-	/*
-	 * Allocate a new one if we're below the limit.
-	 */
-	if (files_stat.nr_files < files_stat.max_files) {
-		file_list_unlock();
-		f = kmem_cache_alloc(filp_cachep, SLAB_KERNEL);
-		file_list_lock();
-		if (f) {
-			files_stat.nr_files++;
-			goto new_one;
-		}
-		/* Big problems... */
-		printk(KERN_WARNING "VFS: filp allocation failed\n");
-
-	} else if (files_stat.max_files > old_max) {
+	file_list_unlock();
+	/* Ran out of filps - report that */
+	if (files_stat.max_files > old_max) {
 		printk(KERN_INFO "VFS: file-max limit %d reached\n", files_stat.max_files);
 		old_max = files_stat.max_files;
+	} else {
+		/* Big problems... */
+		printk(KERN_WARNING "VFS: filp allocation failed\n");
 	}
-	file_list_unlock();
 	return NULL;
 }
 
@@ -108,6 +132,7 @@ int open_private_file(struct file *filp,
 	filp->f_uid    = current->fsuid;
 	filp->f_gid    = current->fsgid;
 	filp->f_op     = dentry->d_inode->i_fop;
+	filp->f_list.next = NULL;
 	error = security_file_alloc(filp);
 	if (!error)
 		if (filp->f_op && filp->f_op->open) {
@@ -162,9 +187,9 @@ void __fput(struct file * file)
 	file_list_lock();
 	file->f_dentry = NULL;
 	file->f_vfsmnt = NULL;
-	list_del(&file->f_list);
-	list_add(&file->f_list, &free_list);
-	files_stat.nr_free_files++;
+	if (file->f_list.next)
+		list_del(&file->f_list);
+	file_free(file);
 	file_list_unlock();
 	dput(dentry);
 	mntput(mnt);
@@ -190,9 +215,9 @@ void put_filp(struct file *file)
 	if(atomic_dec_and_test(&file->f_count)) {
 		security_file_free(file);
 		file_list_lock();
-		list_del(&file->f_list);
-		list_add(&file->f_list, &free_list);
-		files_stat.nr_free_files++;
+		if (file->f_list.next)
+			list_del(&file->f_list);
+		file_free(file);
 		file_list_unlock();
 	}
 }
@@ -202,7 +227,8 @@ void file_move(struct file *file, struct
 	if (!list)
 		return;
 	file_list_lock();
-	list_del(&file->f_list);
+	if (file->f_list.next)
+		list_del(&file->f_list);
 	list_add(&file->f_list, list);
 	file_list_unlock();
 }
@@ -210,7 +236,9 @@ void file_move(struct file *file, struct
 void file_kill(struct file *file)
 {
 	file_list_lock();
-	list_del_init(&file->f_list);
+	if (file->f_list.next)
+		list_del(&file->f_list);
+	file->f_list.next = NULL;
 	file_list_unlock();
 }
 
diff -puN include/linux/file.h~file_list_lock-contention-fix include/linux/file.h
--- 25/include/linux/file.h~file_list_lock-contention-fix	2003-03-17 21:32:04.000000000 -0800
+++ 25-akpm/include/linux/file.h	2003-03-17 21:32:04.000000000 -0800
@@ -40,6 +40,9 @@ extern void FASTCALL(set_close_on_exec(u
 extern void put_filp(struct file *);
 extern int get_unused_fd(void);
 extern void FASTCALL(put_unused_fd(unsigned int fd));
+struct kmem_cache_s;
+extern void filp_ctor(void * objp, struct kmem_cache_s *cachep, unsigned long cflags);
+extern void filp_dtor(void * objp, struct kmem_cache_s *cachep, unsigned long dflags);
 
 extern struct file ** alloc_fd_array(int);
 extern int expand_fd_array(struct files_struct *, int nr);

_