From 3e1866410f11356a9fd869beb3e95983dc79c067 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 13 Aug 2014 01:33:38 -0700 Subject: mnt: Implicitly add MNT_NODEV on remount when it was implicitly added by mount Now that remount is properly enforcing the rule that you can't remove nodev at least sandstorm.io is breaking when performing a remount. It turns out that there is an easy intuitive solution implicitly add nodev on remount when nodev was implicitly added on mount. Tested-by: Cedric Bosdonnat Tested-by: Richard Weinberger Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index 5b66b2b3624..3a1a87dc33d 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2098,7 +2098,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags, } if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) && !(mnt_flags & MNT_NODEV)) { - return -EPERM; + /* Was the nodev implicitly added in mount? */ + if ((mnt->mnt_ns->user_ns != &init_user_ns) && + !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) { + mnt_flags |= MNT_NODEV; + } else { + return -EPERM; + } } if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) && !(mnt_flags & MNT_NOSUID)) { -- cgit v1.2.3-70-g09d2 From b2f5d4dc38e034eecb7987e513255265ff9aa1cf Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 4 Oct 2014 14:44:03 -0700 Subject: umount: Disallow unprivileged mount force Forced unmount affects not just the mount namespace but the underlying superblock as well. Restrict forced unmount to the global root user for now. Otherwise it becomes possible a user in a less privileged mount namespace to force the shutdown of a superblock of a filesystem in a more privileged mount namespace, allowing a DOS attack on root. Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index 3a1a87dc33d..43b16af8af3 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1544,6 +1544,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) goto dput_and_out; if (mnt->mnt.mnt_flags & MNT_LOCKED) goto dput_and_out; + retval = -EPERM; + if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN)) + goto dput_and_out; retval = do_umount(mnt, flags); dput_and_out: -- cgit v1.2.3-70-g09d2 From da362b09e42ee0bcaf0356afee6078b4f324baff Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Oct 2014 12:19:53 -0700 Subject: umount: Do not allow unmounting rootfs. Andrew Vagin writes: > #define _GNU_SOURCE > #include > #include > #include > #include > #include > #include > > int main(int argc, char **argv) > { > int fd; > > fd = open("/proc/self/ns/mnt", O_RDONLY); > if (fd < 0) > return 1; > while (1) { > if (umount2("/", MNT_DETACH) || > setns(fd, CLONE_NEWNS)) > break; > } > > return 0; > } > > root@ubuntu:/home/avagin# gcc -Wall nsenter.c -o nsenter > root@ubuntu:/home/avagin# strace ./nsenter > execve("./nsenter", ["./nsenter"], [/* 22 vars */]) = 0 > ... > open("/proc/self/ns/mnt", O_RDONLY) = 3 > umount("/", MNT_DETACH) = 0 > setns(3, 131072) = 0 > umount("/", MNT_DETACH > causes: > [ 260.548301] ------------[ cut here ]------------ > [ 260.550941] kernel BUG at /build/buildd/linux-3.13.0/fs/pnode.c:372! > [ 260.552068] invalid opcode: 0000 [#1] SMP > [ 260.552068] Modules linked in: xt_CHECKSUM iptable_mangle xt_tcpudp xt_addrtype xt_conntrack ipt_MASQUERADE iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack bridge stp llc dm_thin_pool dm_persistent_data dm_bufio dm_bio_prison iptable_filter ip_tables x_tables crct10dif_pclmul crc32_pclmul ghash_clmulni_intel binfmt_misc nfsd auth_rpcgss nfs_acl aesni_intel nfs lockd aes_x86_64 sunrpc fscache lrw gf128mul glue_helper ablk_helper cryptd serio_raw ppdev parport_pc lp parport btrfs xor raid6_pq libcrc32c psmouse floppy > [ 260.552068] CPU: 0 PID: 1723 Comm: nsenter Not tainted 3.13.0-30-generic #55-Ubuntu > [ 260.552068] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 > [ 260.552068] task: ffff8800376097f0 ti: ffff880074824000 task.ti: ffff880074824000 > [ 260.552068] RIP: 0010:[] [] propagate_umount+0x123/0x130 > [ 260.552068] RSP: 0018:ffff880074825e98 EFLAGS: 00010246 > [ 260.552068] RAX: ffff88007c741140 RBX: 0000000000000002 RCX: ffff88007c741190 > [ 260.552068] RDX: ffff88007c741190 RSI: ffff880074825ec0 RDI: ffff880074825ec0 > [ 260.552068] RBP: ffff880074825eb0 R08: 00000000000172e0 R09: ffff88007fc172e0 > [ 260.552068] R10: ffffffff811cc642 R11: ffffea0001d59000 R12: ffff88007c741140 > [ 260.552068] R13: ffff88007c741140 R14: ffff88007c741140 R15: 0000000000000000 > [ 260.552068] FS: 00007fd5c7e41740(0000) GS:ffff88007fc00000(0000) knlGS:0000000000000000 > [ 260.552068] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ 260.552068] CR2: 00007fd5c7968050 CR3: 0000000070124000 CR4: 00000000000406f0 > [ 260.552068] Stack: > [ 260.552068] 0000000000000002 0000000000000002 ffff88007c631000 ffff880074825ed8 > [ 260.552068] ffffffff811dcfac ffff88007c741140 0000000000000002 ffff88007c741160 > [ 260.552068] ffff880074825f38 ffffffff811dd12b ffffffff811cc642 0000000075640000 > [ 260.552068] Call Trace: > [ 260.552068] [] umount_tree+0x20c/0x260 > [ 260.552068] [] do_umount+0x12b/0x300 > [ 260.552068] [] ? final_putname+0x22/0x50 > [ 260.552068] [] ? putname+0x29/0x40 > [ 260.552068] [] SyS_umount+0xdc/0x100 > [ 260.552068] [] tracesys+0xe1/0xe6 > [ 260.552068] Code: 89 50 08 48 8b 50 08 48 89 02 49 89 45 08 e9 72 ff ff ff 0f 1f 44 00 00 4c 89 e6 4c 89 e7 e8 f5 f6 ff ff 48 89 c3 e9 39 ff ff ff <0f> 0b 66 2e 0f 1f 84 00 00 00 00 00 90 66 66 66 66 90 55 b8 01 > [ 260.552068] RIP [] propagate_umount+0x123/0x130 > [ 260.552068] RSP > [ 260.611451] ---[ end trace 11c33d85f1d4c652 ]-- Which in practice is totally uninteresting. Only the global root user can do it, and it is just a stupid thing to do. However that is no excuse to allow a silly way to oops the kernel. We can avoid this silly problem by setting MNT_LOCKED on the rootfs mount point and thus avoid needing any special cases in the unmount code. Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index 43b16af8af3..15d0328bd03 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3011,6 +3011,7 @@ static void __init init_mount_tree(void) root.mnt = mnt; root.dentry = mnt->mnt_root; + mnt->mnt_flags |= MNT_LOCKED; set_fs_pwd(current->fs, &root); set_fs_root(current->fs, &root); -- cgit v1.2.3-70-g09d2 From 8486a7882b5ba906992fd78bbfcefaae7fe285cc Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Oct 2014 16:22:52 -0700 Subject: mnt: Move the clear of MNT_LOCKED from copy_tree to it's callers. Clear MNT_LOCKED in the callers of copy_tree except copy_mnt_ns, and collect_mounts. In copy_mnt_ns it is necessary to create an exact copy of a mount tree, so not clearing MNT_LOCKED is important. Similarly collect_mounts is used to take a snapshot of the mount tree for audit logging purposes and auditing using a faithful copy of the tree is important. This becomes particularly significant when we start setting MNT_LOCKED on rootfs to prevent it from being unmounted. Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 1 - fs/pnode.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index 15d0328bd03..e8d1ffa7f13 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1613,7 +1613,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, if (IS_ERR(q)) return q; - q->mnt.mnt_flags &= ~MNT_LOCKED; q->mnt_mountpoint = mnt->mnt_mountpoint; p = mnt; diff --git a/fs/pnode.c b/fs/pnode.c index aae331a5d03..260ac8f898a 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -242,6 +242,7 @@ static int propagate_one(struct mount *m) child = copy_tree(last_source, last_source->mnt.mnt_root, type); if (IS_ERR(child)) return PTR_ERR(child); + child->mnt.mnt_flags &= ~MNT_LOCKED; mnt_set_mountpoint(m, mp, child); last_dest = m; last_source = child; -- cgit v1.2.3-70-g09d2 From 381cacb12c009864993a072eedcc0720315aedbd Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 7 Oct 2014 17:11:46 -0700 Subject: mnt: Carefully set CL_UNPRIVILEGED in clone_mnt old->mnt_expiry should be ignored unless CL_EXPIRE is set. Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index e8d1ffa7f13..f87a90b98da 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -963,7 +963,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, } /* Don't allow unprivileged users to reveal what is under a mount */ - if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire)) + if ((flag & CL_UNPRIVILEGED) && + (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire))) mnt->mnt.mnt_flags |= MNT_LOCKED; atomic_inc(&sb->s_active); -- cgit v1.2.3-70-g09d2 From 4fed655c410cc56add64c7b1f7c85c7c56066ac2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 8 Oct 2014 10:42:57 -0700 Subject: mnt: Clear mnt_expire during pivot_root When inspecting the pivot_root and the current mount expiry logic I realized that pivot_root fails to clear like mount move does. Add the missing line in case someone does the interesting feat of moving an expirable submount. This gives a strong guarantee that root of the filesystem tree will never expire. Signed-off-by: "Eric W. Biederman" --- fs/namespace.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index f87a90b98da..fe1c77145a7 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2967,6 +2967,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* mount new_root on / */ attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp); touch_mnt_namespace(current->nsproxy->mnt_ns); + /* A moved mount should not expire automatically */ + list_del_init(&new_mnt->mnt_expire); unlock_mount_hash(); chroot_fs_refs(&root, &new); put_mountpoint(root_mp); -- cgit v1.2.3-70-g09d2