diff -rc2P linux-2.4.10/Makefile linux-2.4.10ctx-2/Makefile
*** linux-2.4.10/Makefile Sun Sep 23 13:02:30 2001
--- linux-2.4.10ctx-2/Makefile Tue Oct 9 21:39:54 2001
***************
*** 2,6 ****
PATCHLEVEL = 4
SUBLEVEL = 10
! EXTRAVERSION =
KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
--- 2,6 ----
PATCHLEVEL = 4
SUBLEVEL = 10
! EXTRAVERSION =ctx-2
KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
diff -rc2P linux-2.4.10/arch/i386/kernel/entry.S linux-2.4.10ctx-2/arch/i386/kernel/entry.S
*** linux-2.4.10/arch/i386/kernel/entry.S Sat Sep 8 15:02:32 2001
--- linux-2.4.10ctx-2/arch/i386/kernel/entry.S Tue Oct 9 21:39:43 2001
***************
*** 620,623 ****
--- 620,625 ----
.long SYMBOL_NAME(sys_getdents64) /* 220 */
.long SYMBOL_NAME(sys_fcntl64)
+ .long SYMBOL_NAME(sys_new_s_context)
+ .long SYMBOL_NAME(sys_set_ipv4root)
.long SYMBOL_NAME(sys_ni_syscall) /* reserved for TUX */
diff -rc2P linux-2.4.10/arch/i386/kernel/ptrace.c linux-2.4.10ctx-2/arch/i386/kernel/ptrace.c
*** linux-2.4.10/arch/i386/kernel/ptrace.c Tue Sep 18 20:04:23 2001
--- linux-2.4.10ctx-2/arch/i386/kernel/ptrace.c Tue Oct 9 21:39:43 2001
***************
*** 171,175 ****
get_task_struct(child);
read_unlock(&tasklist_lock);
! if (!child)
goto out;
--- 171,175 ----
get_task_struct(child);
read_unlock(&tasklist_lock);
! if (!child || child->s_context != current->s_context)
goto out;
diff -rc2P linux-2.4.10/fs/exec.c linux-2.4.10ctx-2/fs/exec.c
*** linux-2.4.10/fs/exec.c Tue Sep 18 16:39:32 2001
--- linux-2.4.10ctx-2/fs/exec.c Tue Oct 9 21:39:43 2001
***************
*** 685,689 ****
int do_unlock = 0;
! new_permitted = cap_intersect(bprm->cap_permitted, cap_bset);
working = cap_intersect(bprm->cap_inheritable,
current->cap_inheritable);
--- 685,689 ----
int do_unlock = 0;
! new_permitted = cap_intersect(bprm->cap_permitted, current->cap_bset);
working = cap_intersect(bprm->cap_inheritable,
current->cap_inheritable);
diff -rc2P linux-2.4.10/fs/proc/array.c linux-2.4.10ctx-2/fs/proc/array.c
*** linux-2.4.10/fs/proc/array.c Wed Sep 19 19:18:31 2001
--- linux-2.4.10ctx-2/fs/proc/array.c Tue Oct 9 21:39:43 2001
***************
*** 263,270 ****
return buffer + sprintf(buffer, "CapInh:\t%016x\n"
"CapPrm:\t%016x\n"
! "CapEff:\t%016x\n",
cap_t(p->cap_inheritable),
cap_t(p->cap_permitted),
! cap_t(p->cap_effective));
}
--- 263,272 ----
return buffer + sprintf(buffer, "CapInh:\t%016x\n"
"CapPrm:\t%016x\n"
! "CapEff:\t%016x\n"
! "CapBset:\t%016x\n",
cap_t(p->cap_inheritable),
cap_t(p->cap_permitted),
! cap_t(p->cap_effective),
! cap_t(p->cap_bset));
}
***************
*** 288,291 ****
--- 290,295 ----
buffer = task_sig(task, buffer);
buffer = task_cap(task, buffer);
+ buffer += sprintf (buffer,"s_context: %d\n",task->s_context);
+ buffer += sprintf (buffer,"ipv4root: %08lx\n",task->ipv4root);
#if defined(CONFIG_ARCH_S390)
buffer = task_show_regs(task, buffer);
diff -rc2P linux-2.4.10/fs/proc/base.c linux-2.4.10ctx-2/fs/proc/base.c
*** linux-2.4.10/fs/proc/base.c Fri Jul 20 15:39:56 2001
--- linux-2.4.10ctx-2/fs/proc/base.c Tue Oct 9 21:39:43 2001
***************
*** 1036,1039 ****
--- 1036,1049 ----
if (!pid)
continue;
+ /* Even if the pid 1 is not part of the security context */
+ /* we show it anyway. This makes the security box */
+ /* more standard (and helps pstree do its job) */
+ /* So current process "knows" pid 1 exist anyway and can't */
+ /* send any signal either */
+
+ /* A process with security context 1 can see all processes */
+ if (pid != 1
+ && current->s_context != 1
+ && p->s_context != current->s_context) continue;
if (--index >= 0)
continue;
diff -rc2P linux-2.4.10/include/asm-i386/unistd.h linux-2.4.10ctx-2/include/asm-i386/unistd.h
*** linux-2.4.10/include/asm-i386/unistd.h Fri Aug 11 17:39:23 2000
--- linux-2.4.10ctx-2/include/asm-i386/unistd.h Tue Oct 9 21:39:43 2001
***************
*** 228,231 ****
--- 228,233 ----
#define __NR_getdents64 220
#define __NR_fcntl64 221
+ #define __NR_new_s_context 222
+ #define __NR_set_ipv4root 223
/* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
diff -rc2P linux-2.4.10/include/linux/capability.h linux-2.4.10ctx-2/include/linux/capability.h
*** linux-2.4.10/include/linux/capability.h Sun Sep 23 13:31:02 2001
--- linux-2.4.10ctx-2/include/linux/capability.h Tue Oct 9 21:39:43 2001
***************
*** 232,235 ****
--- 232,236 ----
arbitrary SCSI commands */
/* Allow setting encryption key on loopback filesystem */
+ /* Allow the selection of a security context */
#define CAP_SYS_ADMIN 21
***************
*** 277,280 ****
--- 278,285 ----
#define CAP_LEASE 28
+
+ /* Allow opening special device file */
+
+ #define CAP_OPENDEV 29
#ifdef __KERNEL__
diff -rc2P linux-2.4.10/include/linux/sched.h linux-2.4.10ctx-2/include/linux/sched.h
*** linux-2.4.10/include/linux/sched.h Sun Sep 23 13:31:02 2001
--- linux-2.4.10ctx-2/include/linux/sched.h Tue Oct 9 21:39:43 2001
***************
*** 276,279 ****
--- 276,293 ----
__user; })
+
+ /*
+ We may have a different domainname and nodename for each security
+ context. By default, a security context share the same as its
+ parent, potentially the information in system_utsname
+ */
+ struct context_info{
+ int refcount;
+ int s_context;
+ char nodename[65];
+ char domainname[65];
+ };
+
+
extern struct user_struct root_user;
#define INIT_USER (&root_user)
***************
*** 400,403 ****
--- 414,423 ----
/* Protection of (de-)allocation: mm, files, fs, tty */
spinlock_t alloc_lock;
+ /* Field to make virtual server running in chroot more isolated */
+ int s_context; /* Process can only deal with other processes */
+ /* with the same s_context */
+ __u32 cap_bset; /* Maximum capability of this process and children */
+ unsigned long ipv4root; /* Process can only bind to this iP */
+ struct context_info *s_info;
};
***************
*** 486,490 ****
pending: { NULL, &tsk.pending.head, {{0}}}, \
blocked: {{0}}, \
! alloc_lock: SPIN_LOCK_UNLOCKED \
}
--- 506,511 ----
pending: { NULL, &tsk.pending.head, {{0}}}, \
blocked: {{0}}, \
! alloc_lock: SPIN_LOCK_UNLOCKED, \
! cap_bset: CAP_INIT_EFF_SET, \
}
***************
*** 899,902 ****
--- 920,928 ----
return res;
}
+
+ /* Manage the reference count of the context_info pointer */
+ void sys_release_s_info (struct task_struct *);
+ void sys_assign_s_info (struct task_struct *);
+ void sys_alloc_s_info (void);
#endif /* __KERNEL__ */
diff -rc2P linux-2.4.10/include/net/route.h linux-2.4.10ctx-2/include/net/route.h
*** linux-2.4.10/include/net/route.h Sun Sep 23 13:31:33 2001
--- linux-2.4.10ctx-2/include/net/route.h Tue Oct 9 21:39:43 2001
***************
*** 161,164 ****
--- 161,171 ----
{
int err;
+ if (current->ipv4root != 0){
+ if (src == 0){
+ src = current->ipv4root;
+ }else if (current->ipv4root != src){
+ return -EPERM;
+ }
+ }
err = ip_route_output(rp, dst, src, tos, oif);
if (err || (dst && src))
diff -rc2P linux-2.4.10/kernel/exit.c linux-2.4.10ctx-2/kernel/exit.c
*** linux-2.4.10/kernel/exit.c Mon Sep 10 16:04:33 2001
--- linux-2.4.10ctx-2/kernel/exit.c Tue Oct 9 21:39:43 2001
***************
*** 65,68 ****
--- 65,69 ----
if (current->counter >= MAX_COUNTER)
current->counter = MAX_COUNTER;
+ sys_release_s_info(p);
p->pid = 0;
free_task_struct(p);
diff -rc2P linux-2.4.10/kernel/fork.c linux-2.4.10ctx-2/kernel/fork.c
*** linux-2.4.10/kernel/fork.c Tue Sep 18 00:46:04 2001
--- linux-2.4.10ctx-2/kernel/fork.c Tue Oct 9 21:39:43 2001
***************
*** 584,587 ****
--- 584,588 ----
*p = *current;
+ sys_assign_s_info (p);
retval = -EAGAIN;
diff -rc2P linux-2.4.10/kernel/signal.c linux-2.4.10ctx-2/kernel/signal.c
*** linux-2.4.10/kernel/signal.c Mon Sep 17 19:40:01 2001
--- linux-2.4.10ctx-2/kernel/signal.c Tue Oct 9 21:39:43 2001
***************
*** 593,597 ****
read_lock(&tasklist_lock);
for_each_task(p) {
! if (p->pgrp == pgrp) {
int err = send_sig_info(sig, info, p);
if (retval)
--- 593,597 ----
read_lock(&tasklist_lock);
for_each_task(p) {
! if (p->pgrp == pgrp && p->s_context == current->s_context) {
int err = send_sig_info(sig, info, p);
if (retval)
***************
*** 640,644 ****
p = find_task_by_pid(pid);
error = -ESRCH;
! if (p)
error = send_sig_info(sig, info, p);
read_unlock(&tasklist_lock);
--- 640,644 ----
p = find_task_by_pid(pid);
error = -ESRCH;
! if (p && p->s_context == current->s_context)
error = send_sig_info(sig, info, p);
read_unlock(&tasklist_lock);
***************
*** 664,668 ****
read_lock(&tasklist_lock);
for_each_task(p) {
! if (p->pid > 1 && p != current) {
int err = send_sig_info(sig, info, p);
++count;
--- 664,668 ----
read_lock(&tasklist_lock);
for_each_task(p) {
! if (p->pid > 1 && p != current && p->s_context == current->s_context) {
int err = send_sig_info(sig, info, p);
++count;
***************
*** 1257,1258 ****
--- 1257,1333 ----
}
#endif /* !alpha && !__ia64__ && !defined(__mips__) */
+
+ /*
+ Change to a new security context and reduce the capability
+ basic set of the current process
+ */
+ asmlinkage int
+ sys_new_s_context(int ctx, __u32 remove_cap)
+ {
+ #define MAX_S_CONTEXT 65535 /* Arbitrary limit */
+ int ret = -EPERM;
+ if (ctx == -1){
+ /* Ok we allocate a new context. For now, we just increase */
+ /* it. Wrap around possible, so we loop */
+ static int alloc_ctx=1;
+ static spinlock_t alloc_ctx_lock = SPIN_LOCK_UNLOCKED;
+ spin_lock(&alloc_ctx_lock);
+ while (1){
+ int found = 0;
+ struct task_struct *p;
+ alloc_ctx++;
+ /* The s_context 1 is special. It sess all processes */
+ if (alloc_ctx == 1){
+ alloc_ctx++;
+ }else if (alloc_ctx > MAX_S_CONTEXT){
+ // No need to grow and grow
+ alloc_ctx = 2;
+ }
+ /* Check if in use */
+ read_lock(&tasklist_lock);
+ for_each_task(p) {
+ if (p->s_context == alloc_ctx){
+ found = 1;
+ break;
+ }
+ }
+ read_unlock(&tasklist_lock);
+ if (!found) break;
+ }
+ current->s_context = alloc_ctx;
+ current->cap_bset &= (~remove_cap);
+ ret = alloc_ctx;
+ sys_alloc_s_info();
+ spin_unlock(&alloc_ctx_lock);
+ }else if (ctx == -2){
+ /* We keep the same s_context, but lower the capabilities */
+ current->cap_bset &= (~remove_cap);
+ ret = current->s_context;
+ }else if (ctx < 0 || ctx > MAX_S_CONTEXT){
+ ret = -EINVAL;
+ }else if (current->s_context == 0 && capable(CAP_SYS_ADMIN)){
+ if (ctx != 0){
+ /* The root context can become any context it wants */
+ int found = 0;
+ struct task_struct *p;
+ current->s_context = ctx;
+ current->cap_bset &= (~remove_cap);
+ /* Check if in use so we reuse the same context_info */
+ read_lock(&tasklist_lock);
+ for_each_task(p) {
+ if (p->s_context == ctx){
+ found = 1;
+ sys_release_s_info(current);
+ sys_assign_s_info (p);
+ current->s_info = p->s_info;
+ break;
+ }
+ }
+ read_unlock(&tasklist_lock);
+ if (!found) sys_alloc_s_info();
+ }
+ ret = ctx;
+ }
+ return ret;
+ }
+
diff -rc2P linux-2.4.10/kernel/sys.c linux-2.4.10ctx-2/kernel/sys.c
*** linux-2.4.10/kernel/sys.c Tue Sep 18 17:10:43 2001
--- linux-2.4.10ctx-2/kernel/sys.c Tue Oct 9 21:39:43 2001
***************
*** 1016,1022 ****
{
int errno = 0;
down_read(&uts_sem);
! if (copy_to_user(name,&system_utsname,sizeof *name))
errno = -EFAULT;
up_read(&uts_sem);
--- 1016,1031 ----
{
int errno = 0;
+ struct new_utsname tmp,*pttmp;
down_read(&uts_sem);
! if (current->s_info != NULL){
! tmp = system_utsname;
! strcpy (tmp.nodename,current->s_info->nodename);
! strcpy (tmp.domainname,current->s_info->domainname);
! pttmp = &tmp;
! }else{
! pttmp = &system_utsname;
! }
! if (copy_to_user(name,pttmp,sizeof *name))
errno = -EFAULT;
up_read(&uts_sem);
***************
*** 1024,1030 ****
--- 1033,1091 ----
}
+ /*
+ Decrease the reference count on the context_info member of a task
+ Free the struct if the reference count reach 0.
+ */
+ void sys_release_s_info (struct task_struct *p)
+ {
+ down_write (&uts_sem);
+ if (p->s_info != NULL){
+ p->s_info->refcount--;
+ if (p->s_info->refcount == 0){
+ // printk ("vfree s_info %d\n",p->pid);
+ vfree (p->s_info);
+ p->s_info = NULL;
+ }
+ }
+ up_write (&uts_sem);
+ }
+ /*
+ Increase the reference count on the context_info member of a task
+ */
+ void sys_assign_s_info (struct task_struct *p)
+ {
+ down_write (&uts_sem);
+ if (p->s_info != NULL) p->s_info->refcount++;
+ up_write (&uts_sem);
+ }
+
+ /*
+ Alloc a new s_info to the current process and release
+ the one currently owned by the current process.
+ */
+ void sys_alloc_s_info()
+ {
+ struct context_info *s_info = vmalloc(sizeof(struct context_info));
+ // printk ("new s_info %d\n",current->pid);
+ s_info->s_context = current->s_context;
+ s_info->refcount = 1;
+ down_read (&uts_sem);
+ if (current->s_info != NULL){
+ strcpy (s_info->nodename,current->s_info->nodename);
+ strcpy (s_info->domainname,current->s_info->domainname);
+ }else{
+ strcpy (s_info->nodename,system_utsname.nodename);
+ strcpy (s_info->domainname,system_utsname.domainname);
+ }
+ up_read (&uts_sem);
+ sys_release_s_info (current);
+ current->s_info = s_info;
+ }
+
+
asmlinkage long sys_sethostname(char *name, int len)
{
int errno;
+ char *nodename;
if (!capable(CAP_SYS_ADMIN))
***************
*** 1034,1039 ****
down_write(&uts_sem);
errno = -EFAULT;
! if (!copy_from_user(system_utsname.nodename, name, len)) {
! system_utsname.nodename[len] = 0;
errno = 0;
}
--- 1095,1102 ----
down_write(&uts_sem);
errno = -EFAULT;
! nodename = system_utsname.nodename;
! if (current->s_info) nodename = current->s_info->nodename;
! if (!copy_from_user(nodename, name, len)) {
! nodename[len] = 0;
errno = 0;
}
***************
*** 1045,1057 ****
{
int i, errno;
if (len < 0)
return -EINVAL;
down_read(&uts_sem);
! i = 1 + strlen(system_utsname.nodename);
if (i > len)
i = len;
errno = 0;
! if (copy_to_user(name, system_utsname.nodename, i))
errno = -EFAULT;
up_read(&uts_sem);
--- 1108,1123 ----
{
int i, errno;
+ char *nodename;
if (len < 0)
return -EINVAL;
down_read(&uts_sem);
! nodename = system_utsname.nodename;
! if (current->s_info != NULL) nodename = current->s_info->nodename;
! i = 1 + strlen(nodename);
if (i > len)
i = len;
errno = 0;
! if (copy_to_user(name, nodename, i))
errno = -EFAULT;
up_read(&uts_sem);
***************
*** 1066,1069 ****
--- 1132,1136 ----
{
int errno;
+ char *domainname;
if (!capable(CAP_SYS_ADMIN))
***************
*** 1073,1080 ****
down_write(&uts_sem);
errno = -EFAULT;
! if (!copy_from_user(system_utsname.domainname, name, len)) {
errno = 0;
! system_utsname.domainname[len] = 0;
}
up_write(&uts_sem);
--- 1140,1149 ----
down_write(&uts_sem);
+ domainname = system_utsname.domainname;
+ if (current->s_info) domainname = current->s_info->domainname;
errno = -EFAULT;
! if (!copy_from_user(domainname, name, len)) {
errno = 0;
! domainname[len] = 0;
}
up_write(&uts_sem);
diff -rc2P linux-2.4.10/kernel/sysctl.c linux-2.4.10ctx-2/kernel/sysctl.c
*** linux-2.4.10/kernel/sysctl.c Tue Sep 18 17:10:43 2001
--- linux-2.4.10ctx-2/kernel/sysctl.c Tue Oct 9 21:35:23 2001
***************
*** 375,378 ****
--- 375,379 ----
static int test_perm(int mode, int op)
{
+ if (!capable(CAP_SYS_ADMIN)) mode &= ~(0222);
if (!current->euid)
mode >>= 6;
***************
*** 789,793 ****
--- 790,805 ----
{
int r;
+ ctl_table tmp;
+ /* HACK for per s_context hostname and domainname */
+ if (current->s_info != NULL){
+ tmp = *table;
+ table = &tmp;
+ if (table->data == (void*)&system_utsname.nodename){
+ tmp.data = ¤t->s_info->nodename;
+ }else if (table->data == (void*)&system_utsname.domainname){
+ tmp.data = ¤t->s_info->domainname;
+ }
+ }
if (!write) {
down_read(&uts_sem);
diff -rc2P linux-2.4.10/net/ipv4/af_inet.c linux-2.4.10ctx-2/net/ipv4/af_inet.c
*** linux-2.4.10/net/ipv4/af_inet.c Tue Aug 7 11:30:50 2001
--- linux-2.4.10ctx-2/net/ipv4/af_inet.c Tue Oct 9 21:39:43 2001
***************
*** 474,477 ****
--- 474,478 ----
int chk_addr_ret;
int err;
+ __u32 s_addr;
/* If the socket has its own bind function then use it. (RAW) */
***************
*** 482,486 ****
return -EINVAL;
! chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
/* Not specified by any standard per-se, however it breaks too
--- 483,497 ----
return -EINVAL;
! s_addr = addr->sin_addr.s_addr;
! if (current->ipv4root != 0){
! // printk ("ipv4root0 %08lx %08x\n",current->ipv4root,s_addr);
! if (s_addr == 0){
! s_addr = current->ipv4root;
! }else if (s_addr != current->ipv4root){
! return -EADDRNOTAVAIL;
! }
! }
! chk_addr_ret = inet_addr_type(s_addr);
! // printk ("ipv4root %08lx %08x %d\n",current->ipv4root,s_addr,chk_addr_ret);
/* Not specified by any standard per-se, however it breaks too
***************
*** 493,497 ****
if (sysctl_ip_nonlocal_bind == 0 &&
sk->protinfo.af_inet.freebind == 0 &&
! addr->sin_addr.s_addr != INADDR_ANY &&
chk_addr_ret != RTN_LOCAL &&
chk_addr_ret != RTN_MULTICAST &&
--- 504,508 ----
if (sysctl_ip_nonlocal_bind == 0 &&
sk->protinfo.af_inet.freebind == 0 &&
! s_addr != INADDR_ANY &&
chk_addr_ret != RTN_LOCAL &&
chk_addr_ret != RTN_MULTICAST &&
***************
*** 518,522 ****
goto out;
! sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr;
if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
sk->saddr = 0; /* Use device */
--- 529,533 ----
goto out;
! sk->rcv_saddr = sk->saddr = s_addr;
if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
sk->saddr = 0; /* Use device */
diff -rc2P linux-2.4.10/net/socket.c linux-2.4.10ctx-2/net/socket.c
*** linux-2.4.10/net/socket.c Tue Aug 28 13:56:06 2001
--- linux-2.4.10ctx-2/net/socket.c Tue Oct 9 21:39:43 2001
***************
*** 1766,1767 ****
--- 1766,1779 ----
return len;
}
+
+ asmlinkage int sys_set_ipv4root (unsigned long ip)
+ {
+ int ret = -EPERM;
+ if (current->ipv4root == 0
+ || capable(CAP_SYS_ADMIN)){
+ ret = 0;
+ current->ipv4root = ip;
+ }
+ return ret;
+ }
+