 fs/exec.c                  |    9 ++
 fs/namei.c                 |    2 
 fs/open.c                  |   29 ++++-
 fs/proc/array.c            |    6 +
 fs/xattr.c                 |    3 -
 include/linux/binfmts.h    |    1 
 include/linux/capability.h |   56 ++++++++-
 include/linux/securebits.h |    4 +
 include/linux/security.h   |   10 +-
 kernel/capability.c        |   48 ++++++--
 kernel/fork.c              |    2 
 kernel/ptrace.c            |    2 
 security/Kconfig           |   10 ++
 security/capability.c      |    4 +
 security/commoncap.c       |  260 ++++++++++++++++++++++++++++++++++++++------
 security/dummy.c           |    6 +
 16 files changed, 379 insertions(+), 73 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 54135df..1a7ff92 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -925,10 +925,16 @@ int prepare_binprm(struct linux_binprm *
 
 	bprm->e_uid = current->euid;
 	bprm->e_gid = current->egid;
+	bprm->is_suid = 0;
+	bprm->is_sgid = 0;
 
 	if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
+		if (!capable(CAP_REG_SXID))
+			return -EPERM;
+
 		/* Set-uid? */
 		if (mode & S_ISUID) {
+			bprm->is_suid = 1;
 			current->personality &= ~PER_CLEAR_ON_SETID;
 			bprm->e_uid = inode->i_uid;
 		}
@@ -940,6 +946,7 @@ int prepare_binprm(struct linux_binprm *
 		 * executable.
 		 */
 		if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
+			bprm->is_sgid = 1;
 			current->personality &= ~PER_CLEAR_ON_SETID;
 			bprm->e_gid = inode->i_gid;
 		}
@@ -1133,6 +1140,8 @@ int do_execve(char * filename,
 	int retval;
 	int i;
 
+	if (!capable(CAP_REG_EXEC))
+		return -EPERM;
 	retval = -ENOMEM;
 	bprm = kzalloc(sizeof(*bprm), GFP_KERNEL);
 	if (!bprm)
diff --git a/fs/namei.c b/fs/namei.c
index 432d6bc..69a3bae 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -242,7 +242,7 @@ int permission(struct inode *inode, int 
 		/*
 		 * Nobody gets write access to an immutable file.
 		 */
-		if (IS_IMMUTABLE(inode))
+		if (IS_IMMUTABLE(inode) || !capable(CAP_REG_WRITE))
 			return -EACCES;
 	}
 
diff --git a/fs/open.c b/fs/open.c
index 303f06d..77a12ba 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -253,7 +253,7 @@ static long do_sys_truncate(const char _
 		goto dput_and_out;
 
 	error = -EPERM;
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+	if (IS_IMMUTABLE(inode) || IS_APPEND(inode) || !capable(CAP_REG_WRITE))
 		goto dput_and_out;
 
 	/*
@@ -382,6 +382,10 @@ asmlinkage long sys_utime(char __user * 
 	if (IS_RDONLY(inode))
 		goto dput_and_out;
 
+	error = -EPERM;
+	if (!capable(CAP_REG_WRITE))
+		goto dput_and_out;
+
 	/* Don't worry, the checks are done in inode_change_ok() */
 	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
 	if (times) {
@@ -439,6 +443,10 @@ long do_utimes(int dfd, char __user *fil
 	if (IS_RDONLY(inode))
 		goto dput_and_out;
 
+	error = -EPERM;
+	if (!capable(CAP_REG_WRITE))
+		goto dput_and_out;
+
 	/* Don't worry, the checks are done in inode_change_ok() */
 	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
 	if (times) {
@@ -515,7 +523,8 @@ asmlinkage long sys_faccessat(int dfd, c
 	 * but we cannot because user_path_walk can sleep.
 	 */
 	if (current->uid)
-		cap_clear(current->cap_effective);
+		current->cap_effective = cap_intersect(current->cap_effective,
+						       CAP_REGULAR_SET);
 	else
 		current->cap_effective = current->cap_permitted;
 
@@ -639,7 +648,7 @@ asmlinkage long sys_fchmod(unsigned int 
 	if (IS_RDONLY(inode))
 		goto out_putf;
 	err = -EPERM;
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+	if (IS_IMMUTABLE(inode) || IS_APPEND(inode) || !capable(CAP_REG_WRITE))
 		goto out_putf;
 	mutex_lock(&inode->i_mutex);
 	if (mode == (mode_t) -1)
@@ -673,7 +682,7 @@ asmlinkage long sys_fchmodat(int dfd, co
 		goto dput_and_out;
 
 	error = -EPERM;
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+	if (IS_IMMUTABLE(inode) || IS_APPEND(inode) || !capable(CAP_REG_WRITE))
 		goto dput_and_out;
 
 	mutex_lock(&inode->i_mutex);
@@ -710,7 +719,7 @@ static int chown_common(struct dentry * 
 	if (IS_RDONLY(inode))
 		goto out;
 	error = -EPERM;
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+	if (IS_IMMUTABLE(inode) || IS_APPEND(inode) || !capable(CAP_REG_WRITE))
 		goto out;
 	newattrs.ia_valid =  ATTR_CTIME;
 	if (user != (uid_t) -1) {
@@ -1104,7 +1113,10 @@ asmlinkage long sys_open(const char __us
 	if (force_o_largefile())
 		flags |= O_LARGEFILE;
 
-	ret = do_sys_open(AT_FDCWD, filename, flags, mode);
+	if (capable(CAP_REG_OPEN))
+		ret = do_sys_open(AT_FDCWD, filename, flags, mode);
+	else
+		ret = -EPERM;
 	/* avoid REGPARM breakage on x86: */
 	prevent_tail_call(ret);
 	return ret;
@@ -1119,7 +1131,10 @@ asmlinkage long sys_openat(int dfd, cons
 	if (force_o_largefile())
 		flags |= O_LARGEFILE;
 
-	ret = do_sys_open(dfd, filename, flags, mode);
+	if (capable(CAP_REG_OPEN))
+		ret = do_sys_open(dfd, filename, flags, mode);
+	else
+		ret = -EPERM;
 	/* avoid REGPARM breakage on x86: */
 	prevent_tail_call(ret);
 	return ret;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 0b615d6..6724fc2 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -285,9 +285,9 @@ static inline char * task_sig(struct tas
 
 static inline char *task_cap(struct task_struct *p, char *buffer)
 {
-    return buffer + sprintf(buffer, "CapInh:\t%016x\n"
-			    "CapPrm:\t%016x\n"
-			    "CapEff:\t%016x\n",
+    return buffer + sprintf(buffer, "CapInh:\t%016llx\n"
+			    "CapPrm:\t%016llx\n"
+			    "CapEff:\t%016llx\n",
 			    cap_t(p->cap_inheritable),
 			    cap_t(p->cap_permitted),
 			    cap_t(p->cap_effective));
diff --git a/fs/xattr.c b/fs/xattr.c
index c32f15b..33b70ce 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -35,7 +35,8 @@ xattr_permission(struct inode *inode, co
 	if (mask & MAY_WRITE) {
 		if (IS_RDONLY(inode))
 			return -EROFS;
-		if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+		if (IS_IMMUTABLE(inode) || IS_APPEND(inode)
+		    || !capable(CAP_REG_WRITE))
 			return -EPERM;
 	}
 
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index c1e82c5..c7fb183 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -29,6 +29,7 @@ struct linux_binprm{
 	struct file * file;
 	int e_uid, e_gid;
 	kernel_cap_t cap_inheritable, cap_permitted, cap_effective;
+	char is_suid, is_sgid;
 	void *security;
 	int argc, envc;
 	char * filename;	/* Name of binary as seen by procps */
diff --git a/include/linux/capability.h b/include/linux/capability.h
index 6548b35..428ccc5 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -27,7 +27,8 @@ #include <linux/compiler.h>
    library since the draft standard requires the use of malloc/free
    etc.. */
  
-#define _LINUX_CAPABILITY_VERSION  0x19980330
+#define _LINUX_CAPABILITY_VERSION  0x20060903
+#define _LINUX_CAPABILITY_OLD_VERSION  0x19980330
 
 typedef struct __user_cap_header_struct {
 	__u32 version;
@@ -35,10 +36,16 @@ typedef struct __user_cap_header_struct 
 } __user *cap_user_header_t;
  
 typedef struct __user_cap_data_struct {
+        __u64 effective;
+        __u64 permitted;
+        __u64 inheritable;
+} __user *cap_user_data_t;
+ 
+typedef struct __user_cap_data_old_struct {
         __u32 effective;
         __u32 permitted;
         __u32 inheritable;
-} __user *cap_user_data_t;
+} __user *cap_user_data_old_t;
   
 #ifdef __KERNEL__
 
@@ -50,12 +57,12 @@ #include <asm/current.h>
 #ifdef STRICT_CAP_T_TYPECHECKS
 
 typedef struct kernel_cap_struct {
-	__u32 cap;
+	__u64 cap;
 } kernel_cap_t;
 
 #else
 
-typedef __u32 kernel_cap_t;
+typedef __u64 kernel_cap_t;
 
 #endif
   
@@ -288,6 +295,32 @@ #define CAP_AUDIT_WRITE      29
 
 #define CAP_AUDIT_CONTROL    30
 
+/* Number of low (=system, =additional) caps */
+#define CAP_NUMCAPS_SYS	     30
+
+
+/**
+ ** Regular capabilities (normally possessed by all processes).
+ **/
+
+/* Can fork() */
+#define CAP_REG_FORK         32
+
+/* Can open() */
+#define CAP_REG_OPEN         33
+
+/* Can exec() */
+#define CAP_REG_EXEC         34
+
+/* Might gain permissions on exec() */
+#define CAP_REG_SXID         35
+
+/* Perform write access to the filesystem */
+#define CAP_REG_WRITE        36
+
+/* Can use ptrace() */
+#define CAP_REG_PTRACE       37
+
 #ifdef __KERNEL__
 /* 
  * Bounding set
@@ -310,12 +343,13 @@ #define cap_t(x) (x)
 
 #endif
 
-#define CAP_EMPTY_SET       to_cap_t(0)
-#define CAP_FULL_SET        to_cap_t(~0)
-#define CAP_INIT_EFF_SET    to_cap_t(~0 & ~CAP_TO_MASK(CAP_SETPCAP))
-#define CAP_INIT_INH_SET    to_cap_t(0)
+#define CAP_EMPTY_SET       to_cap_t(0ULL)
+#define CAP_FULL_SET        to_cap_t(~0ULL)
+#define CAP_REGULAR_SET     to_cap_t(0x0000ffff00000000ULL)
+#define CAP_INIT_EFF_SET    to_cap_t(~0ULL)
+#define CAP_INIT_INH_SET    to_cap_t(~0ULL)
 
-#define CAP_TO_MASK(x) (1 << (x))
+#define CAP_TO_MASK(x) (1ULL << (x))
 #define cap_raise(c, flag)   (cap_t(c) |=  CAP_TO_MASK(flag))
 #define cap_lower(c, flag)   (cap_t(c) &= ~CAP_TO_MASK(flag))
 #define cap_raised(c, flag)  (cap_t(c) & CAP_TO_MASK(flag))
@@ -351,8 +385,8 @@ static inline kernel_cap_t cap_invert(ke
 #define cap_isclear(c)       (!cap_t(c))
 #define cap_issubset(a,set)  (!(cap_t(a) & ~cap_t(set)))
 
-#define cap_clear(c)         do { cap_t(c) =  0; } while(0)
-#define cap_set_full(c)      do { cap_t(c) = ~0; } while(0)
+#define cap_clear(c)         do { cap_t(c) =  0ULL; } while(0)
+#define cap_set_full(c)      do { cap_t(c) = ~0ULL; } while(0)
 #define cap_mask(c,mask)     do { cap_t(c) &= cap_t(mask); } while(0)
 
 #define cap_is_fs_cap(c)     (CAP_TO_MASK(c) & CAP_FS_MASK)
diff --git a/include/linux/securebits.h b/include/linux/securebits.h
index 5b06178..0092332 100644
--- a/include/linux/securebits.h
+++ b/include/linux/securebits.h
@@ -18,6 +18,10 @@ #define SECURE_NOROOT            0
    privileges. When unset, setuid doesn't change privileges. */
 #define SECURE_NO_SETUID_FIXUP   2
 
+/* When set, exec()ing a suid/sgid program does not force reinstate
+   all "regular" capabilities. */
+#define SECURE_NO_SXID_SANITIZE  4
+
 /* Each securesetting is implemented using two bits. One bit specify
    whether the setting is on or off. The other bit specify whether the
    setting is fixed or not. A setting which is fixed cannot be changed
diff --git a/include/linux/security.h b/include/linux/security.h
index 6bc2aad..265ab00 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -51,6 +51,10 @@ extern int cap_inode_setxattr(struct den
 extern int cap_inode_removexattr(struct dentry *dentry, char *name);
 extern int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags);
 extern void cap_task_reparent_to_init (struct task_struct *p);
+extern int cap_task_kill(struct task_struct *p, struct siginfo *info, int sig, u32 secid);
+extern int cap_task_setscheduler (struct task_struct *p, int policy, struct sched_param *lp);
+extern int cap_task_setioprio (struct task_struct *p, int ioprio);
+extern int cap_task_setnice (struct task_struct *p, int nice);
 extern int cap_syslog (int type);
 extern int cap_vm_enough_memory (long pages);
 
@@ -2544,12 +2548,12 @@ static inline int security_task_setgroup
 
 static inline int security_task_setnice (struct task_struct *p, int nice)
 {
-	return 0;
+	return cap_task_setnice(p, nice);
 }
 
 static inline int security_task_setioprio (struct task_struct *p, int ioprio)
 {
-	return 0;
+	return cap_task_setioprio(p, ioprio);
 }
 
 static inline int security_task_getioprio (struct task_struct *p)
@@ -2584,7 +2588,7 @@ static inline int security_task_kill (st
 				      struct siginfo *info, int sig,
 				      u32 secid)
 {
-	return 0;
+	return cap_task_kill(p, info, sig, secid);
 }
 
 static inline int security_task_wait (struct task_struct *p)
diff --git a/kernel/capability.c b/kernel/capability.c
index c7685ad..2bb802a 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -15,7 +15,7 @@ #include <linux/syscalls.h>
 #include <asm/uaccess.h>
 
 unsigned securebits = SECUREBITS_DEFAULT; /* systemwide security settings */
-kernel_cap_t cap_bset = CAP_INIT_EFF_SET;
+kernel_cap_t cap_bset = CAP_INIT_INH_SET;
 
 EXPORT_SYMBOL(securebits);
 EXPORT_SYMBOL(cap_bset);
@@ -52,7 +52,8 @@ asmlinkage long sys_capget(cap_user_head
      if (get_user(version, &header->version))
 	     return -EFAULT;
 
-     if (version != _LINUX_CAPABILITY_VERSION) {
+     if (version != _LINUX_CAPABILITY_VERSION
+	 && version != _LINUX_CAPABILITY_OLD_VERSION) {
 	     if (put_user(_LINUX_CAPABILITY_VERSION, &header->version))
 		     return -EFAULT; 
              return -EINVAL;
@@ -82,8 +83,18 @@ out:
      read_unlock(&tasklist_lock); 
      spin_unlock(&task_capability_lock);
 
-     if (!ret && copy_to_user(dataptr, &data, sizeof data))
-          return -EFAULT; 
+     if (!ret) {
+	     if (version == _LINUX_CAPABILITY_OLD_VERSION) {
+		     struct __user_cap_data_old_struct data_old;
+		     data_old.effective = data_old.effective & 0xffffffffULL;
+		     data_old.permitted = data_old.permitted & 0xffffffffULL;
+		     data_old.inheritable = data_old.inheritable & 0xffffffffULL;
+		     if (copy_to_user(dataptr, &data_old, sizeof data_old))
+			     return -EFAULT;
+	     } else
+		     if (copy_to_user(dataptr, &data, sizeof data))
+			     return -EFAULT;
+     }
 
      return ret;
 }
@@ -179,7 +190,8 @@ asmlinkage long sys_capset(cap_user_head
      if (get_user(version, &header->version))
 	     return -EFAULT; 
 
-     if (version != _LINUX_CAPABILITY_VERSION) {
+     if (version != _LINUX_CAPABILITY_VERSION
+	 && version != _LINUX_CAPABILITY_OLD_VERSION) {
 	     if (put_user(_LINUX_CAPABILITY_VERSION, &header->version))
 		     return -EFAULT; 
              return -EINVAL;
@@ -191,10 +203,25 @@ asmlinkage long sys_capset(cap_user_head
      if (pid && pid != current->pid && !capable(CAP_SETPCAP))
              return -EPERM;
 
-     if (copy_from_user(&effective, &data->effective, sizeof(effective)) ||
-	 copy_from_user(&inheritable, &data->inheritable, sizeof(inheritable)) ||
-	 copy_from_user(&permitted, &data->permitted, sizeof(permitted)))
-	     return -EFAULT; 
+     if (version == _LINUX_CAPABILITY_OLD_VERSION) {
+	     const cap_user_data_old_t data2 = (void *)data;
+	     __u32 w;
+	     /* Assume caller wants to keep all regular caps and clear
+	      * all unknown additional caps.  Is this right? */
+	     if (copy_from_user(&w, &data2->effective, sizeof(w)))
+		     return -EFAULT;
+	     effective = (__u64)w | CAP_REGULAR_SET;
+	     if (copy_from_user(&w, &data2->inheritable, sizeof(w)))
+		     return -EFAULT;
+	     inheritable = (__u64)w | CAP_REGULAR_SET;
+	     if (copy_from_user(&w, &data2->permitted, sizeof(w)))
+		     return -EFAULT;
+	     permitted = (__u64)w | CAP_REGULAR_SET;
+     } else
+	     if (copy_from_user(&effective, &data->effective, sizeof(effective)) ||
+		 copy_from_user(&inheritable, &data->inheritable, sizeof(inheritable)) ||
+		 copy_from_user(&permitted, &data->permitted, sizeof(permitted)))
+		     return -EFAULT; 
 
      spin_lock(&task_capability_lock);
      read_lock(&tasklist_lock);
@@ -237,7 +264,8 @@ out:
 int __capable(struct task_struct *t, int cap)
 {
 	if (security_capable(t, cap) == 0) {
-		t->flags |= PF_SUPERPRIV;
+		if (!cap_raised(CAP_REGULAR_SET, cap))
+			t->flags |= PF_SUPERPRIV;
 		return 1;
 	}
 	return 0;
diff --git a/kernel/fork.c b/kernel/fork.c
index f9b014e..20f559f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1347,6 +1347,8 @@ long do_fork(unsigned long clone_flags,
 	struct pid *pid = alloc_pid();
 	long nr;
 
+	if (!capable(CAP_REG_FORK))
+		return -EPERM;
 	if (!pid)
 		return -EAGAIN;
 	nr = pid->nr;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 9a111f7..093307d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -132,6 +132,8 @@ static int may_attach(struct task_struct
 	/* Don't let security modules deny introspection */
 	if (task == current)
 		return 0;
+	if (!capable(CAP_REG_PTRACE))
+		return -EPERM;
 	if (((current->uid != task->euid) ||
 	     (current->uid != task->suid) ||
 	     (current->uid != task->uid) ||
diff --git a/security/Kconfig b/security/Kconfig
index 67785df..ce2bac7 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -80,6 +80,16 @@ config SECURITY_CAPABILITIES
 	  This enables the "default" Linux capabilities functionality.
 	  If you are unsure how to answer this question, answer Y.
 
+config SECURITY_FS_CAPABILITIES
+	bool "Filesystem Capabilities"
+	depends on SECURITY_CAPABILITIES
+	default n
+	help
+	  This enables filesystem capabilities, allowing you to give
+	  binaries a subset of root's powers without using setuid 0.
+
+	  If in doubt, answer N.
+
 config SECURITY_ROOTPLUG
 	tristate "Root Plug Support"
 	depends on USB && SECURITY
diff --git a/security/capability.c b/security/capability.c
index b868e7e..14cb592 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -40,6 +40,10 @@ static struct security_operations capabi
 	.inode_setxattr =		cap_inode_setxattr,
 	.inode_removexattr =		cap_inode_removexattr,
 
+	.task_kill =			cap_task_kill,
+	.task_setscheduler =		cap_task_setscheduler,
+	.task_setioprio =		cap_task_setioprio,
+	.task_setnice =			cap_task_setnice,
 	.task_post_setuid =		cap_task_post_setuid,
 	.task_reparent_to_init =	cap_task_reparent_to_init,
 
diff --git a/security/commoncap.c b/security/commoncap.c
index f50fc29..1988efc 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -12,6 +12,7 @@ #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/security.h>
+#include <linux/securebits.h>
 #include <linux/file.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
@@ -23,6 +24,7 @@ #include <linux/netlink.h>
 #include <linux/ptrace.h>
 #include <linux/xattr.h>
 #include <linux/hugetlb.h>
+#include <linux/mount.h>
 
 int cap_netlink_send(struct sock *sk, struct sk_buff *skb)
 {
@@ -97,6 +99,8 @@ int cap_capset_check (struct task_struct
 	if (!cap_issubset (*effective, *permitted)) {
 		return -EPERM;
 	}
+	/* we allow Inheritable not to be a subset of Permitted:
+	 * cap_capset_set will intersect them anyway */
 
 	return 0;
 }
@@ -105,48 +109,166 @@ void cap_capset_set (struct task_struct 
 		     kernel_cap_t *inheritable, kernel_cap_t *permitted)
 {
 	target->cap_effective = *effective;
-	target->cap_inheritable = *inheritable;
+	target->cap_inheritable = cap_intersect (*permitted, *inheritable);
 	target->cap_permitted = *permitted;
 }
 
+#define XATTR_CAPS_SUFFIX "capability"
+#define XATTR_NAME_CAPS XATTR_SECURITY_PREFIX XATTR_CAPS_SUFFIX
+struct vfs_cap_data_struct {
+	__u32 version;
+	__u32 effective;
+	__u32 permitted;
+	__u32 inheritable;
+};
+
+static inline void convert_to_le(struct vfs_cap_data_struct *cap)
+{
+	cap->version = le32_to_cpu(cap->version);
+	cap->effective = le32_to_cpu(cap->effective);
+	cap->permitted = le32_to_cpu(cap->permitted);
+	cap->inheritable = le32_to_cpu(cap->inheritable);
+}
+
+static int check_cap_sanity(struct vfs_cap_data_struct *cap)
+{
+	int i;
+
+	if (cap->version != _LINUX_CAPABILITY_OLD_VERSION)
+		return -EPERM;
+
+	for (i=CAP_NUMCAPS_SYS; i<sizeof(cap->effective); i++) {
+		if (cap->effective & CAP_TO_MASK(i))
+			return -EPERM;
+	}
+	for (i=CAP_NUMCAPS_SYS; i<sizeof(cap->permitted); i++) {
+		if (cap->permitted & CAP_TO_MASK(i))
+			return -EPERM;
+	}
+	for (i=CAP_NUMCAPS_SYS; i<sizeof(cap->inheritable); i++) {
+		if (cap->inheritable & CAP_TO_MASK(i))
+			return -EPERM;
+	}
+
+	return 0;
+}
+
 int cap_bprm_set_security (struct linux_binprm *bprm)
 {
+	struct dentry *dentry;
+	ssize_t rc;
+	struct vfs_cap_data_struct cap_struct;
+	struct inode *inode;
+
 	/* Copied from fs/exec.c:prepare_binprm. */
 
-	/* We don't have VFS support for capabilities yet */
-	cap_clear (bprm->cap_inheritable);
+	cap_set_full (bprm->cap_inheritable);
 	cap_clear (bprm->cap_permitted);
-	cap_clear (bprm->cap_effective);
+	cap_set_full (bprm->cap_effective);
+
+	/* Sanitize caps for all suid/sgid programs. */
+	if (!issecure (SECURE_NO_SXID_SANITIZE) && (bprm->is_suid
+						    || bprm->is_sgid)) {
+		/* Ensure that they get _at least_ regular caps. */
+		bprm->cap_permitted = CAP_REGULAR_SET;
+		if ((current->uid != 0 && current->euid != 0
+		     && current->suid != 0)
+		    || issecure (SECURE_NOROOT)) {
+			/* Ensure that they don't get _more_ caps when they
+			   might not expect it.  Note that dropping
+			   capabilities on change of ?uid from ==0 to !=0 will
+			   be handled by cap_task_post_setuid() called from
+			   cap_bprm_apply_creds() below.  Yuck!!!!!!  This is
+			   soooooo ugly! */
+			bprm->cap_inheritable = CAP_REGULAR_SET;
+			bprm->cap_effective = CAP_REGULAR_SET;
+		}
+	}
 
 	/*  To support inheritance of root-permissions and suid-root
 	 *  executables under compatibility mode, we raise all three
 	 *  capability sets for the file.
-	 *
-	 *  If only the real uid is 0, we only raise the inheritable
-	 *  and permitted sets of the executable file.
 	 */
-
 	if (!issecure (SECURE_NOROOT)) {
-		if (bprm->e_uid == 0 || current->uid == 0) {
+		if (bprm->is_suid && bprm->e_uid == 0) {
 			cap_set_full (bprm->cap_inheritable);
 			cap_set_full (bprm->cap_permitted);
-		}
-		if (bprm->e_uid == 0)
 			cap_set_full (bprm->cap_effective);
+		}
 	}
+
+#ifdef CONFIG_SECURITY_FS_CAPABILITIES
+	/* Locate any VFS capabilities: */
+
+	dentry = dget(bprm->file->f_dentry);
+	inode = dentry->d_inode;
+	if (!inode->i_op || !inode->i_op->getxattr) {
+		dput(dentry);
+		return 0;
+	}
+
+	rc = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, &cap_struct,
+						sizeof(cap_struct));
+	dput(dentry);
+
+	if (rc == -ENODATA)
+		return 0;
+
+	if (rc < 0) {
+		printk(KERN_NOTICE "%s: Error (%ld) getting xattr\n",
+				__FUNCTION__, (long int)rc);
+		return rc;
+	}
+
+	if (rc != sizeof(cap_struct)) {
+		printk(KERN_NOTICE "%s: got wrong size for getxattr (%ld)\n",
+					__FUNCTION__, (long int)rc);
+		return -EPERM;
+	}
+	
+	convert_to_le(&cap_struct);
+	if (check_cap_sanity(&cap_struct))
+		return -EPERM;
+
+	bprm->cap_effective = cap_combine (cap_intersect (bprm->cap_effective,
+							  CAP_REGULAR_SET),
+					   to_cap_t(cap_struct.effective));
+	bprm->cap_permitted = cap_combine (cap_intersect (bprm->cap_permitted,
+							  CAP_REGULAR_SET),
+					   to_cap_t(cap_struct.permitted));
+	if (!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)
+	    || !capable(CAP_REG_SXID)) /* Don't allow to gain privileges! */
+		cap_clear (bprm->cap_permitted);
+	bprm->cap_inheritable = cap_combine (cap_intersect (bprm->cap_inheritable,
+							    CAP_REGULAR_SET),
+					     to_cap_t(cap_struct.inheritable));
+
+#endif
 	return 0;
 }
 
 void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
 {
 	/* Derived from fs/exec.c:compute_creds. */
-	kernel_cap_t new_permitted, working;
+	kernel_cap_t new_permitted, new_effective, working;
+	uid_t old_ruid, old_euid, old_suid;
 
+	/* P'(per) = (P(inh) & F(inh)) | (F(per) & bset) */
 	new_permitted = cap_intersect (bprm->cap_permitted, cap_bset);
 	working = cap_intersect (bprm->cap_inheritable,
 				 current->cap_inheritable);
 	new_permitted = cap_combine (new_permitted, working);
 
+	/* P'(eff) = (P(inh) & P(eff) & F(inh)) | (F(per) & F(eff) & bset) */
+	new_effective = cap_intersect (bprm->cap_permitted, bprm->cap_effective);
+	new_effective = cap_intersect (new_effective, cap_bset);
+	working = cap_intersect (bprm->cap_inheritable,
+				 current->cap_effective);
+	working = cap_intersect (working, current->cap_inheritable);
+	new_effective = cap_combine (new_effective, working);
+
+	/* P'(inh) = P'(per) */
+
 	if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
 	    !cap_issubset (new_permitted, current->cap_permitted)) {
 		current->mm->dumpable = suid_dumpable;
@@ -159,36 +281,37 @@ void cap_bprm_apply_creds (struct linux_
 			if (!capable (CAP_SETPCAP)) {
 				new_permitted = cap_intersect (new_permitted,
 							current->cap_permitted);
+				new_effective = cap_intersect (new_permitted,
+							       new_effective);
 			}
 		}
 	}
 
+	old_ruid = current->uid;
+	old_euid = current->euid;
+	old_suid = current->suid;
 	current->suid = current->euid = current->fsuid = bprm->e_uid;
 	current->sgid = current->egid = current->fsgid = bprm->e_gid;
 
-	/* For init, we want to retain the capabilities set
-	 * in the init_task struct. Thus we skip the usual
-	 * capability rules */
-	if (current->pid != 1) {
-		current->cap_permitted = new_permitted;
-		current->cap_effective =
-		    cap_intersect (new_permitted, bprm->cap_effective);
-	}
-
-	/* AUD: Audit candidate if current->cap_effective is set */
+	current->cap_permitted = new_permitted;
+	current->cap_effective = new_effective;
+	current->cap_inheritable = new_permitted;
 
 	current->keep_capabilities = 0;
+	/* Make sure we drop capabilities if required by suid. */
+	cap_task_post_setuid (old_ruid, old_euid, old_suid, LSM_SETID_RES);
+
+	/* AUD: Audit candidate if current->cap_effective is set */
 }
 
 int cap_bprm_secureexec (struct linux_binprm *bprm)
 {
 	/* If/when this module is enhanced to incorporate capability
 	   bits on files, the test below should be extended to also perform a 
-	   test between the old and new capability sets.  For now,
-	   it simply preserves the legacy decision algorithm used by
-	   the old userland. */
-	return (current->euid != current->uid ||
-		current->egid != current->gid);
+	   test between the old and new capability sets. */
+	return ((bprm->is_suid || bprm->is_sgid)
+		&& !cap_issubset (bprm->cap_permitted,
+				  current->cap_permitted));
 }
 
 int cap_inode_setxattr(struct dentry *dentry, char *name, void *value,
@@ -244,15 +367,24 @@ static inline void cap_emulate_setxuid (
 					int old_suid)
 {
 	if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) &&
-	    (current->uid != 0 && current->euid != 0 && current->suid != 0) &&
-	    !current->keep_capabilities) {
-		cap_clear (current->cap_permitted);
-		cap_clear (current->cap_effective);
+	    (current->uid != 0 && current->euid != 0 && current->suid != 0)) {
+		if (!current->keep_capabilities) {
+			current->cap_permitted
+				= cap_intersect (current->cap_permitted,
+						 CAP_REGULAR_SET);
+			current->cap_effective
+				= cap_intersect (current->cap_effective,
+						 CAP_REGULAR_SET);
+		}
+		current->cap_inheritable
+			= cap_intersect (current->cap_inheritable,
+					 CAP_REGULAR_SET);
 	}
-	if (old_euid == 0 && current->euid != 0) {
-		cap_clear (current->cap_effective);
+	if (old_euid == 0 && current->euid != 0 && !current->keep_capabilities) {
+		current->cap_effective = cap_intersect (current->cap_effective,
+							CAP_REGULAR_SET);
 	}
-	if (old_euid != 0 && current->euid == 0) {
+	if (old_euid != 0 && current->euid == 0 && !current->keep_capabilities) {
 		current->cap_effective = current->cap_permitted;
 	}
 }
@@ -300,6 +432,62 @@ int cap_task_post_setuid (uid_t old_ruid
 	return 0;
 }
 
+/*
+ * Rationale: code calling task_setscheduler, task_setioprio, and
+ * task_setnice, assumes that
+ *   . if capable(cap_sys_nice), then those actions should be allowed
+ *   . if not capable(cap_sys_nice), but acting on your own processes,
+ *   	then those actions should be allowed
+ * This is insufficient now since you can call code without suid, but
+ * yet with increased caps.
+ * So we check for increased caps on the target process.
+ */
+static inline int cap_safe_nice(struct task_struct *p)
+{
+	if (!cap_issubset(p->cap_permitted, current->cap_permitted) &&
+	    !__capable(current, CAP_SYS_NICE))
+		return -EPERM;
+	return 0;
+}
+
+int cap_task_setscheduler (struct task_struct *p, int policy,
+			   struct sched_param *lp)
+{
+	return cap_safe_nice(p);
+}
+
+int cap_task_setioprio (struct task_struct *p, int ioprio)
+{
+	return cap_safe_nice(p);
+}
+
+int cap_task_setnice (struct task_struct *p, int nice)
+{
+	return cap_safe_nice(p);
+}
+
+int cap_task_kill(struct task_struct *p, struct siginfo *info,
+				int sig, u32 secid)
+{
+	if (info != SEND_SIG_NOINFO && (is_si_special(info) || SI_FROMKERNEL(info)))
+		return 0;
+
+	if (secid)
+		/*
+		 * Signal sent as a particular user.
+		 * Capabilities are ignored.  May be wrong, but it's the
+		 * only thing we can do at the moment.
+		 * Used only by usb drivers?
+		 */
+		return 0;
+	if (capable(CAP_KILL))
+		return 0;
+	if (cap_issubset(p->cap_permitted, current->cap_permitted))
+		return 0;
+
+	return -EPERM;
+}
+
 void cap_task_reparent_to_init (struct task_struct *p)
 {
 	p->cap_effective = CAP_INIT_EFF_SET;
@@ -337,6 +525,10 @@ EXPORT_SYMBOL(cap_bprm_secureexec);
 EXPORT_SYMBOL(cap_inode_setxattr);
 EXPORT_SYMBOL(cap_inode_removexattr);
 EXPORT_SYMBOL(cap_task_post_setuid);
+EXPORT_SYMBOL(cap_task_kill);
+EXPORT_SYMBOL(cap_task_setscheduler);
+EXPORT_SYMBOL(cap_task_setioprio);
+EXPORT_SYMBOL(cap_task_setnice);
 EXPORT_SYMBOL(cap_task_reparent_to_init);
 EXPORT_SYMBOL(cap_syslog);
 EXPORT_SYMBOL(cap_vm_enough_memory);
diff --git a/security/dummy.c b/security/dummy.c
index 58c6d39..572a15b 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -37,11 +37,11 @@ static int dummy_ptrace (struct task_str
 static int dummy_capget (struct task_struct *target, kernel_cap_t * effective,
 			 kernel_cap_t * inheritable, kernel_cap_t * permitted)
 {
-	*effective = *inheritable = *permitted = 0;
+	*effective = *inheritable = *permitted = CAP_REGULAR_SET;
 	if (!issecure(SECURE_NOROOT)) {
 		if (target->euid == 0) {
-			*permitted |= (~0 & ~CAP_FS_MASK);
-			*effective |= (~0 & ~CAP_TO_MASK(CAP_SETPCAP) & ~CAP_FS_MASK);
+			*permitted |= (CAP_FULL_SET & ~CAP_FS_MASK);
+			*effective |= (CAP_FULL_SET & ~CAP_TO_MASK(CAP_SETPCAP) & ~CAP_FS_MASK);
 		}
 		if (target->fsuid == 0) {
 			*permitted |= CAP_FS_MASK;
