From: Manfred Spraul <manfred@colorfullife.com>

Here are the updates to the patch-mq-03-core:

It fixes the bug with the notification descriptors and adds more
documentation.  What's still missing is a description of the change to
signal.c.



---

 25-akpm/init/Kconfig |    1 
 25-akpm/ipc/mqueue.c |  141 +++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 105 insertions(+), 37 deletions(-)

diff -puN init/Kconfig~mq-03-core-update init/Kconfig
--- 25/init/Kconfig~mq-03-core-update	2004-03-24 02:51:57.661129048 -0800
+++ 25-akpm/init/Kconfig	2004-03-24 02:51:57.664128592 -0800
@@ -92,6 +92,7 @@ config SYSVIPC
 
 config POSIX_MQUEUE
 	bool "POSIX Message Queues"
+	depends on EXPERIMENTAL
 	---help---
 	  POSIX variant of message queues is a part of IPC. In POSIX message
 	  queues every message has a priority which decides about succession
diff -puN ipc/mqueue.c~mq-03-core-update ipc/mqueue.c
--- 25/ipc/mqueue.c~mq-03-core-update	2004-03-24 02:51:57.662128896 -0800
+++ 25-akpm/ipc/mqueue.c	2004-03-24 02:51:57.667128136 -0800
@@ -105,7 +105,8 @@ static struct inode *mqueue_get_inode(st
 		inode->i_gid = current->fsgid;
 		inode->i_blksize = PAGE_CACHE_SIZE;
 		inode->i_blocks = 0;
-		inode->i_mtime = inode->i_ctime = inode->i_atime = CURRENT_TIME;
+		inode->i_mtime = inode->i_ctime = inode->i_atime =
+				CURRENT_TIME;
 
 		if (S_ISREG(mode)) {
 			struct mqueue_inode_info *info;
@@ -166,7 +167,7 @@ static void init_once(void *foo, kmem_ca
 	struct mqueue_inode_info *p = (struct mqueue_inode_info *) foo;
 
 	if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
-	    SLAB_CTOR_CONSTRUCTOR)
+		SLAB_CTOR_CONSTRUCTOR)
 		inode_init_once(&p->vfs_inode);
 }
 
@@ -174,7 +175,7 @@ static struct inode *mqueue_alloc_inode(
 {
 	struct mqueue_inode_info *ei;
 
-	ei = (struct mqueue_inode_info *)kmem_cache_alloc(mqueue_inode_cachep, SLAB_KERNEL);
+	ei = kmem_cache_alloc(mqueue_inode_cachep, SLAB_KERNEL);
 	if (!ei)
 		return NULL;
 	return &ei->vfs_inode;
@@ -208,7 +209,8 @@ static void mqueue_delete_inode(struct i
 	spin_unlock(&mq_lock);
 }
 
-static int mqueue_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd)
+static int mqueue_create(struct inode *dir, struct dentry *dentry,
+				int mode, struct nameidata *nd)
 {
 	struct inode *inode;
 	int error;
@@ -272,7 +274,7 @@ static void wq_add(struct mqueue_inode_i
  * sr: SEND or RECV
  */
 static int wq_sleep(struct mqueue_inode_info *info, int sr,
-		    long timeout, struct ext_wait_queue *ewp)
+			long timeout, struct ext_wait_queue *ewp)
 {
 	int retval;
 	signed long time;
@@ -305,7 +307,6 @@ static int wq_sleep(struct mqueue_inode_
 			retval = -ETIMEDOUT;
 			break;
 		}
-		printk(KERN_WARNING "mqueue: Spurious wakeup in wq_sleep()\n");
 	}
 	list_del(&ewp->list);
 out_unlock:
@@ -314,7 +315,9 @@ out:
 	return retval;
 }
 
-/* Returns waiting task that should be serviced first or NULL if none exists */
+/*
+ * Returns waiting task that should be serviced first or NULL if none exists
+ */
 static struct ext_wait_queue *wq_get_first_waiter(
 		struct mqueue_inode_info *info, int sr)
 {
@@ -386,7 +389,8 @@ static long prepare_timeout(const struct
 	long timeout;
 
 	if (u_arg) {
-		if (unlikely(copy_from_user(&ts, u_arg, sizeof(struct timespec))))
+		if (unlikely(copy_from_user(&ts, u_arg,
+					sizeof(struct timespec))))
 			return -EFAULT;
 
 		if (unlikely(ts.tv_nsec < 0 || ts.tv_sec < 0
@@ -410,7 +414,26 @@ static long prepare_timeout(const struct
 	return timeout;
 }
 
-static unsigned int mqueue_notify_poll(struct file *filp, struct poll_table_struct *poll_tab)
+/*
+ * File descriptor based notification, intended to be used to implement
+ * SIGEV_THREAD:
+ * SIGEV_THREAD means that a notification function should be called in the
+ * context of a new thread. The kernel can't do that. Therefore mq_notify
+ * calls with SIGEV_THREAD return a new file descriptor. A user space helper
+ * must create a new thread and then read from the given file descriptor.
+ * The read always returns one byte. If it's NOTIFY_WOKENUP, then it must
+ * call the notification function. If it's NOTIFY_REMOVED, then the
+ * notification was removed. The file descriptor supports poll, thus one
+ * supervisor thread can manage multiple message queue notifications.
+ *
+ * The implementation must support multiple outstanding notifications:
+ * It's possible that a new notification is added and signaled before user
+ * space calls mqueue_notify_read for the previous notification.
+ * Therefore the notification state is stored in the private_data field of
+ * the file descriptor.
+ */
+static unsigned int mqueue_notify_poll(struct file *filp,
+					struct poll_table_struct *poll_tab)
 {
 	struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode);
 	int retval;
@@ -424,7 +447,8 @@ static unsigned int mqueue_notify_poll(s
 	return retval;
 }
 
-static ssize_t mqueue_notify_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
+static ssize_t mqueue_notify_read(struct file *filp, char __user *buf,
+					size_t count, loff_t *ppos)
 {
 	struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode);
 	char result;
@@ -454,6 +478,19 @@ static ssize_t mqueue_notify_read(struct
 	return 1;
 }
 
+static int mqueue_notify_release(struct inode *inode, struct file *filp)
+{
+	struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode);
+
+	spin_lock(&info->lock);
+	if (info->notify_owner && info->notify_filp == filp)
+		info->notify_owner = 0;
+	filp->private_data = NP_REMOVED;
+	spin_unlock(&info->lock);
+
+	return 0;
+}
+
 static void remove_notification(struct mqueue_inode_info *info)
 {
 	if (info->notify.sigev_notify == SIGEV_THREAD) {
@@ -467,7 +504,7 @@ static void remove_notification(struct m
  * Invoked when creating a new queue via sys_mq_open
  */
 static struct file *do_create(struct dentry *dir, struct dentry *dentry,
-	     int oflag, mode_t mode, struct mq_attr __user *u_attr)
+			int oflag, mode_t mode, struct mq_attr __user *u_attr)
 {
 	struct file *filp;
 	struct inode *inode;
@@ -486,7 +523,8 @@ static struct file *do_create(struct den
 			if (attr.mq_maxmsg > HARD_MSGMAX)
 				return ERR_PTR(-EINVAL);
 		} else {
-			if (attr.mq_maxmsg > msg_max || attr.mq_msgsize > msgsize_max)
+			if (attr.mq_maxmsg > msg_max ||
+					attr.mq_msgsize > msgsize_max)
 				return ERR_PTR(-EINVAL);
 		}
 	} else {
@@ -520,8 +558,9 @@ static struct file *do_create(struct den
 /* Opens existing queue */
 static struct file *do_open(struct dentry *dentry, int oflag)
 {
+static int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
+					MAY_READ | MAY_WRITE };
 	struct file *filp;
-	static int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, MAY_READ | MAY_WRITE };
 
 	if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
 		return ERR_PTR(-EINVAL);
@@ -538,12 +577,12 @@ static struct file *do_open(struct dentr
 }
 
 asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
-	struct mq_attr __user *u_attr)
+				struct mq_attr __user *u_attr)
 {
 	struct dentry *dentry;
 	struct file *filp;
-	char   *name;
-	int    fd, error;
+	char *name;
+	int fd, error;
 
 	if (IS_ERR(name = getname(u_name)))
 		return PTR_ERR(name);
@@ -562,12 +601,15 @@ asmlinkage long sys_mq_open(const char _
 
 	if (oflag & O_CREAT) {
 		if (dentry->d_inode) {	/* entry already exists */
-			filp = (oflag & O_EXCL) ? ERR_PTR(-EEXIST) : do_open(dentry, oflag);
+			filp = (oflag & O_EXCL) ? ERR_PTR(-EEXIST) :
+					do_open(dentry, oflag);
 		} else {
-			filp = do_create(mqueue_mnt->mnt_root, dentry, oflag, mode, u_attr);
+			filp = do_create(mqueue_mnt->mnt_root, dentry,
+						oflag, mode, u_attr);
 		}
 	} else
-		filp = (dentry->d_inode) ? do_open(dentry, oflag) : ERR_PTR(-ENOENT);
+		filp = (dentry->d_inode) ? do_open(dentry, oflag) :
+					ERR_PTR(-ENOENT);
 
 	dput(dentry);
 
@@ -635,8 +677,20 @@ out_unlock:
 	return err;
 }
 
-/* Pipelined send and receive functions. Do not confuse this with SysV message
- * queues terminology. It is little bit different. */
+/* Pipelined send and receive functions.
+ *
+ * If a receiver finds no waiting message, then it registers itself in the
+ * list of waiting receivers. A sender checks that list before adding the new
+ * message into the message array. If there is a waiting receiver, then it
+ * bypasses the message array and directly hands the message over to the
+ * receiver.
+ * The receiver accepts the message and returns without grabbing the queue
+ * spinlock. Therefore an intermediate STATE_PENDING state and memory barriers
+ * are necessary. The same algorithm is used for sysv semaphores, see
+ * ipc/sem.c fore more details.
+ *
+ * The same algorithm is used for senders.
+ */
 
 /* pipelined_send() - send a message directly to the task waiting in
  * sys_mq_timedreceive() (without inserting message into a queue). */
@@ -670,7 +724,8 @@ static inline void pipelined_receive(str
 }
 
 asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
-	size_t msg_len, unsigned int msg_prio, const struct timespec __user *u_abs_timeout)
+	size_t msg_len, unsigned int msg_prio,
+	const struct timespec __user *u_abs_timeout)
 {
 	struct file *filp;
 	struct inode *inode;
@@ -678,7 +733,7 @@ asmlinkage long sys_mq_timedsend(mqd_t m
 	struct ext_wait_queue *receiver;
 	struct msg_msg *msg_ptr;
 	struct mqueue_inode_info *info;
- 	long timeout;
+	long timeout;
 	int ret;
 
 	if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX))
@@ -738,7 +793,8 @@ asmlinkage long sys_mq_timedsend(mqd_t m
 			msg_insert(msg_ptr, info);
 			__do_notify(info);
 		}
-		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+		inode->i_atime = inode->i_mtime = inode->i_ctime =
+				CURRENT_TIME;
 		spin_unlock(&info->lock);
 		ret = 0;
 	}
@@ -798,7 +854,8 @@ asmlinkage ssize_t sys_mq_timedreceive(m
 	} else {
 		msg_ptr = msg_get(info);
 
-		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+		inode->i_atime = inode->i_mtime = inode->i_ctime =
+				CURRENT_TIME;
 
 		/* There is now free space in queue. */
 		pipelined_receive(info);
@@ -820,11 +877,13 @@ out:
 	return ret;
 }
 
-/* Notes: the case when user wants us to deregister (with NULL as pointer or SIGEV_NONE)
- * and he isn't currently owner of notification will be silently discarded.
- * It isn't explicitly defined in the POSIX.
+/*
+ * Notes: the case when user wants us to deregister (with NULL as pointer
+ * or SIGEV_NONE) and he isn't currently owner of notification will be
+ * silently discarded. It isn't explicitly defined in the POSIX.
  */
-asmlinkage long sys_mq_notify(mqd_t mqdes, const struct sigevent __user *u_notification)
+asmlinkage long sys_mq_notify(mqd_t mqdes,
+				const struct sigevent __user *u_notification)
 {
 	int ret, fd;
 	struct file *filp, *nfilp;
@@ -835,7 +894,8 @@ asmlinkage long sys_mq_notify(mqd_t mqde
 	if (u_notification == NULL) {
 		notification.sigev_notify = SIGEV_NONE;
 	} else {
-		if (copy_from_user(&notification, u_notification, sizeof(struct sigevent)))
+		if (copy_from_user(&notification, u_notification,
+					sizeof(struct sigevent)))
 			return -EFAULT;
 
 		if (unlikely(notification.sigev_notify != SIGEV_NONE &&
@@ -843,7 +903,8 @@ asmlinkage long sys_mq_notify(mqd_t mqde
 			     notification.sigev_notify != SIGEV_THREAD))
 			return -EINVAL;
 		if (notification.sigev_notify == SIGEV_SIGNAL &&
-			(notification.sigev_signo < 0 || notification.sigev_signo > _NSIG)) {
+			(notification.sigev_signo < 0 ||
+			 notification.sigev_signo > _NSIG)) {
 			return -EINVAL;
 		}
 	}
@@ -915,8 +976,9 @@ out:
 	return ret;
 }
 
-asmlinkage long sys_mq_getsetattr(mqd_t mqdes, const struct mq_attr __user *u_mqstat,
-	struct mq_attr __user *u_omqstat)
+asmlinkage long sys_mq_getsetattr(mqd_t mqdes,
+			const struct mq_attr __user *u_mqstat,
+			struct mq_attr __user *u_omqstat)
 {
 	int ret;
 	struct mq_attr mqstat, omqstat;
@@ -955,7 +1017,8 @@ asmlinkage long sys_mq_getsetattr(mqd_t 
 	spin_unlock(&info->lock);
 
 	ret = 0;
-	if (u_omqstat != NULL && copy_to_user(u_omqstat, &omqstat, sizeof(struct mq_attr)))
+	if (u_omqstat != NULL && copy_to_user(u_omqstat, &omqstat,
+						sizeof(struct mq_attr)))
 		ret = -EFAULT;
 
 out_fput:
@@ -977,6 +1040,7 @@ static struct file_operations mqueue_fil
 static struct file_operations mqueue_notify_fops = {
 	.poll = mqueue_notify_poll,
 	.read = mqueue_notify_read,
+	.release = mqueue_notify_release,
 };
 
 
@@ -1056,7 +1120,8 @@ static int __init init_mqueue_fs(void)
 	int error;
 
 	mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache",
-		sizeof(struct mqueue_inode_info), 0, SLAB_HWCACHE_ALIGN, init_once, NULL);
+				sizeof(struct mqueue_inode_info), 0,
+				SLAB_HWCACHE_ALIGN, init_once, NULL);
 	if (mqueue_inode_cachep == NULL)
 		return -ENOMEM;
 
@@ -1086,8 +1151,10 @@ out_filesystem:
 out_sysctl:
 	unregister_sysctl_table(mq_sysctl_table);
 out_cache:
-	if (kmem_cache_destroy(mqueue_inode_cachep))
-		printk(KERN_INFO "mqueue_inode_cache: not all structures were freed\n");
+	if (kmem_cache_destroy(mqueue_inode_cachep)) {
+		printk(KERN_INFO
+			"mqueue_inode_cache: not all structures were freed\n");
+	}
 	return error;
 }
 

_