Projects
Mega:23.03
systemd
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 2
View file
_service:tar_scm:systemd.spec
Changed
@@ -21,7 +21,7 @@ Name: systemd Url: https://www.freedesktop.org/wiki/Software/systemd Version: 249 -Release: 48 +Release: 49 License: MIT and LGPLv2+ and GPLv2+ Summary: System and Service Manager @@ -441,6 +441,15 @@ Patch6394: backport-shared-condition-avoid-nss-lookup-in-PID1.patch Patch6395: backport-logind-fix-getting-property-OnExternalPower-via-D-Bu.patch Patch6396: backport-udev-support-by-path-devlink-for-multipath-nvme-bloc.patch +Patch6397: backport-homework-add-new-helper-call-that-can-shift-home-dir.patch +Patch6398: backport-homed-when-using-id-mapping-on-the-home-dirs-also-do.patch +Patch6399: backport-homed-permit-inodes-owned-by-UID_MAPPED_ROOT-to-be-c.patch +Patch6400: backport-nspawn-add-filesystem-id-mapping-support-to-bind-and.patch +Patch6401: backport-namespace-util-introduce-userns_acquire-as-helper-fo.patch +Patch6402: backport-nspawn-make-sure-host-root-can-write-to-the-uidmappe.patch +Patch6403: backport-nspawn-make-more-stuff-const.patch +Patch6404: backport-nspawn-if-we-refuse-to-operate-on-some-directory-exp.patch +Patch6405: backport-base-filesystem-use-uid_is_valid-at-one-more-place.patch Patch9001: update-rtc-with-system-clock-when-shutdown.patch Patch9002: udev-add-actions-while-rename-netif-failed.patch @@ -1911,6 +1920,9 @@ %{_libdir}/security/pam_systemd.so %changelog +* Tue Mar 21 2023 zhangyao <zhangyao108@huawei.com> -249-49 +- nspawn: uidmap mount fix + * Tue Mar 7 2023 wangyuhang <wangyuhang27@huawei.com> -249-48 - fix symlinks to NVMe drives are missing in /dev/disk/by-path
View file
_service:tar_scm:backport-base-filesystem-use-uid_is_valid-at-one-more-place.patch
Added
@@ -0,0 +1,25 @@ +From 264caae299aa8f42f20460ad3280add657a3747f Mon Sep 17 00:00:00 2001 +From: Lennart Poettering <lennart@poettering.net> +Date: Thu, 17 Mar 2022 12:03:03 +0100 +Subject: PATCH base-filesystem: use uid_is_valid() at one more place + +--- + src/shared/base-filesystem.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/shared/base-filesystem.c b/src/shared/base-filesystem.c +index 5f5328c8cf..3a77f37a69 100644 +--- a/src/shared/base-filesystem.c ++++ b/src/shared/base-filesystem.c +@@ -176,7 +176,7 @@ int base_filesystem_create(const char *root, uid_t uid, gid_t gid) { + return -errno; + } + +- if (uid != UID_INVALID || gid != UID_INVALID) ++ if (uid_is_valid(uid) || gid_is_valid(gid)) + if (fchownat(fd, tablei.dir, uid, gid, AT_SYMLINK_NOFOLLOW) < 0) + return log_error_errno(errno, "Failed to chown directory at %s/%s: %m", root, tablei.dir); + } +-- +2.27.0 +
View file
_service:tar_scm:backport-homed-permit-inodes-owned-by-UID_MAPPED_ROOT-to-be-c.patch
Added
@@ -0,0 +1,33 @@ +From 0456118807f3afb99bef0249e952f9cdff9fcce2 Mon Sep 17 00:00:00 2001 +From: Lennart Poettering <lennart@poettering.net> +Date: Thu, 17 Mar 2022 15:01:11 +0100 +Subject: PATCH homed: permit inodes owned by UID_MAPPED_ROOT to be created + in $HOME + +If people use nspawn in their $HOME we should allow this inodes owned by +this special UID to be created temporarily, so that UID mapped nspawn +containers just work. +--- + src/home/homework-mount.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/src/home/homework-mount.c b/src/home/homework-mount.c +index 35645e292e..3095a10ff9 100644 +--- a/src/home/homework-mount.c ++++ b/src/home/homework-mount.c +@@ -216,6 +216,12 @@ static int make_userns(uid_t stored_uid, uid_t exposed_uid) { + if (r < 0) + return log_oom(); + ++ /* Map nspawn's mapped root UID as identity mapping so that people can run nspawn uidmap mounted ++ * containers off $HOME, if they want. */ ++ r = strextendf(&text, UID_FMT " " UID_FMT " " UID_FMT "\n", UID_MAPPED_ROOT, UID_MAPPED_ROOT, 1); ++ if (r < 0) ++ return log_oom(); ++ + /* Leave everything else unmapped, starting from UID_NOBODY itself. Specifically, this means the + * whole space outside of 16bit remains unmapped */ + +-- +2.27.0 +
View file
_service:tar_scm:backport-homed-when-using-id-mapping-on-the-home-dirs-also-do.patch
Added
@@ -0,0 +1,37 @@ +From 1af53c0fa8a945616e371cb9cc75ea882874a09c Mon Sep 17 00:00:00 2001 +From: Lennart Poettering <lennart@poettering.net> +Date: Tue, 15 Feb 2022 18:22:37 +0100 +Subject: PATCH homed: when using id mapping on the home dirs, also do an + identity mapping for the container UID ranges + +Apparently people really want to put high UIDs in their homedirs. Let's +add some minimal support for that. + +Further discussion: https://github.com/systemd/systemd/pull/22239#issuecomment-1040421552 + +Inspired by, based on, and replacing #22239 by Christian Brauner. +--- + src/home/homework-mount.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/src/home/homework-mount.c b/src/home/homework-mount.c +index 0b028dad37..35645e292e 100644 +--- a/src/home/homework-mount.c ++++ b/src/home/homework-mount.c +@@ -209,6 +209,13 @@ static int make_userns(uid_t stored_uid, uid_t exposed_uid) { + if (r < 0) + return log_oom(); + ++ /* Also map the container range. People can use that to place containers owned by high UIDs in their ++ * home directories if they really want. We won't manage this UID range for them but pass it through ++ * 1:1, and it will lose its meaning once migrated between hosts. */ ++ r = append_identity_range(&text, CONTAINER_UID_BASE_MIN, CONTAINER_UID_BASE_MAX+1, stored_uid); ++ if (r < 0) ++ return log_oom(); ++ + /* Leave everything else unmapped, starting from UID_NOBODY itself. Specifically, this means the + * whole space outside of 16bit remains unmapped */ + +-- +2.27.0 +
View file
_service:tar_scm:backport-homework-add-new-helper-call-that-can-shift-home-dir.patch
Added
@@ -0,0 +1,215 @@ +From c7bf079bbc19e3b409acc0c7acc3e14749211fe2 Mon Sep 17 00:00:00 2001 +From: Lennart Poettering <lennart@poettering.net> +Date: Wed, 20 Oct 2021 22:12:16 +0200 +Subject: PATCH homework: add new helper call that can shift home dir UID/GID + ranges + +This new helper is not used yet, but it's useful for apply UID/GID +shifts so that the underlying home dir can use an arbitrary UID (for +example "nobody") and we'll still make it appear as owned by the target +UID. + +This operates roughly like this: + +1. The relevant underlying UID is mapped to the target UID +2. Everything in the homed UID range except for the target UID is left + unmapped (and thus will appear as "nobody") +3. Everything in the 16bit UID range outside of the homed UID + range/target UID/nobody user is mapped to itself +4. Everything else is left unmapped (in particular everything outside of + the 16 bit range). + +Why do it like this? + +The 2nd rule done to ensure that any files from homed's managed UID +range that do not match the user's own UID will be shown as "unmapped" +basically. Of course, IRL this should never happen, except if people +managed to manipulate the underlying fs directly. + +The 3rd rule is to allow that if devs untar an OS image it more or +less just works as before: 16bit UIDs outside of the homed range will +be mapped onto themselves: you can untar things and tar it back up and +things will just work. +--- + src/home/homework-mount.c | 144 ++++++++++++++++++++++++++++++++++++++ + src/home/homework-mount.h | 1 + + 2 files changed, 145 insertions(+) + +diff --git a/src/home/homework-mount.c b/src/home/homework-mount.c +index da4f14e..b002489 100644 +--- a/src/home/homework-mount.c ++++ b/src/home/homework-mount.c +@@ -2,13 +2,21 @@ + + #include <sched.h> + #include <sys/mount.h> ++#include <linux/fs.h> + + #include "alloc-util.h" ++#include "fd-util.h" ++#include "format-util.h" ++#include "home-util.h" + #include "homework-mount.h" ++#include "missing_mount.h" ++#include "missing_syscall.h" + #include "mkdir.h" + #include "mount-util.h" ++#include "namespace-util.h" + #include "path-util.h" + #include "string-util.h" ++#include "user-util.h" + + static const char *mount_options_for_fstype(const char *fstype) { + if (streq(fstype, "ext4")) +@@ -95,3 +103,139 @@ int home_move_mount(const char *user_name_and_realm, const char *target) { + log_info("Moving to final mount point %s completed.", target); + return 0; + } ++ ++static int append_identity_range(char **text, uid_t start, uid_t next_start, uid_t exclude) { ++ /* Creates an identity range ranging from 'start' to 'next_start-1'. Excludes the UID specified by 'exclude' if ++ * it is in that range. */ ++ ++ assert(text); ++ ++ if (next_start <= start) /* Empty range? */ ++ return 0; ++ ++ if (exclude < start || exclude >= next_start) /* UID to exclude it outside of the range? */ ++ return strextendf(text, UID_FMT " " UID_FMT " " UID_FMT "\n", start, start, next_start - start); ++ ++ if (start == exclude && next_start == exclude + 1) /* The only UID in the range is the one to exclude? */ ++ return 0; ++ ++ if (exclude == start) /* UID to exclude at beginning of range? */ ++ return strextendf(text, UID_FMT " " UID_FMT " " UID_FMT "\n", start+1, start+1, next_start - start - 1); ++ ++ if (exclude == next_start - 1) /* UID to exclude at end of range? */ ++ return strextendf(text, UID_FMT " " UID_FMT " " UID_FMT "\n", start, start, next_start - start - 1); ++ ++ return strextendf(text, ++ UID_FMT " " UID_FMT " " UID_FMT "\n" ++ UID_FMT " " UID_FMT " " UID_FMT "\n", ++ start, start, exclude - start, ++ exclude + 1, exclude + 1, next_start - exclude - 1); ++} ++ ++static int make_userns(uid_t stored_uid, uid_t exposed_uid) { ++ _cleanup_free_ char *text = NULL; ++ _cleanup_close_ int userns_fd = -1; ++ int r; ++ ++ assert(uid_is_valid(stored_uid)); ++ assert(uid_is_valid(exposed_uid)); ++ ++ assert_cc(HOME_UID_MIN <= HOME_UID_MAX); ++ assert_cc(HOME_UID_MAX < UID_NOBODY); ++ ++ /* Map everything below the homed UID range to itself (except for the UID we actually care about if ++ * it is inside this range) */ ++ r = append_identity_range(&text, 0, HOME_UID_MIN, stored_uid); ++ if (r < 0) ++ return log_oom(); ++ ++ /* Now map the UID we are doing this for to the target UID. */ ++ r = strextendf(&text, UID_FMT " " UID_FMT " " UID_FMT "\n", stored_uid, exposed_uid, 1); ++ if (r < 0) ++ return log_oom(); ++ ++ /* Map everything above the homed UID range to itself (again, excluding the UID we actually care ++ * about if it is in that range). Also we leave "nobody" itself excluded) */ ++ r = append_identity_range(&text, HOME_UID_MAX, UID_NOBODY, stored_uid); ++ if (r < 0) ++ return log_oom(); ++ ++ /* Leave everything else unmapped, starting from UID_NOBODY itself. Specifically, this means the ++ * whole space outside of 16bit remains unmapped */ ++ ++ log_debug("Creating userns with mapping:\n%s", text); ++ ++ userns_fd = userns_acquire(text, text); /* same uid + gid mapping */ ++ if (userns_fd < 0) ++ return log_error_errno(userns_fd, "Failed to allocate user namespace: %m"); ++ ++ return TAKE_FD(userns_fd); ++} ++ ++int home_shift_uid(int dir_fd, const char *target, uid_t stored_uid, uid_t exposed_uid, int *ret_mount_fd) { ++ _cleanup_close_ int mount_fd = -1, userns_fd = -1; ++ int r; ++ ++ assert(dir_fd >= 0); ++ assert(uid_is_valid(stored_uid)); ++ assert(uid_is_valid(exposed_uid)); ++ ++ /* Let's try to set up a UID mapping for this directory. This is called when first creating a home ++ * directory or when activating it again. We do this as optimization only, to avoid having to ++ * recursively chown() things on each activation. If the kernel or file system doesn't support this ++ * scheme we'll handle this gracefully, and not do anything, so that the later recursive chown()ing ++ * then fixes up things for us. Note that the chown()ing is smart enough to skip things if they look ++ * alright already. ++ * ++ * Note that this always creates a new mount (i.e. we use OPEN_TREE_CLONE), since applying idmaps is ++ * not allowed once the mount is put in place. */ ++ ++ mount_fd = open_tree(dir_fd, "", AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); ++ if (mount_fd < 0) { ++ if (ERRNO_IS_NOT_SUPPORTED(errno)) { ++ log_debug_errno(errno, "The open_tree() syscall is not supported, not setting up UID shift mount: %m"); ++ ++ if (ret_mount_fd) ++ *ret_mount_fd = -1; ++ ++ return 0; ++ } ++ ++ return log_error_errno(errno, "Failed to open tree of home directory: %m"); ++ } ++ ++ userns_fd = make_userns(stored_uid, exposed_uid); ++ if (userns_fd < 0) ++ return userns_fd; ++ ++ /* Set the user namespace mapping attribute on the cloned mount point */ ++ if (mount_setattr(mount_fd, "", AT_EMPTY_PATH, ++ &(struct mount_attr) { ++ .attr_set = MOUNT_ATTR_IDMAP, ++ .userns_fd = userns_fd, ++ }, MOUNT_ATTR_SIZE_VER0) < 0) { ++ ++ if (ERRNO_IS_NOT_SUPPORTED(errno) || errno == EINVAL) { /* EINVAL is documented in mount_attr() as fs doesn't support idmapping */ ++ log_debug_errno(errno, "UID/GID mapping for shifted mount not available, not setting it up: %m"); ++ ++ if (ret_mount_fd) ++ *ret_mount_fd = -1; ++ ++ return 0; ++ } ++ ++ return log_error_errno(errno, "Failed to apply UID/GID mapping: %m"); ++ } ++ ++ if (target) ++ r = move_mount(mount_fd, "", AT_FDCWD, target, MOVE_MOUNT_F_EMPTY_PATH); ++ else ++ r = move_mount(mount_fd, "", dir_fd, "", MOVE_MOUNT_F_EMPTY_PATH|MOVE_MOUNT_T_EMPTY_PATH); ++ if (r < 0) ++ return log_error_errno(errno, "Failed to apply UID/GID map: %m"); ++ ++ if (ret_mount_fd) ++ *ret_mount_fd = TAKE_FD(mount_fd); ++ ++ return 1; ++} +diff --git a/src/home/homework-mount.h b/src/home/homework-mount.h +index 2a4591c..91c2303 100644 +--- a/src/home/homework-mount.h ++++ b/src/home/homework-mount.h +@@ -6,3 +6,4 @@ + int home_mount_node(const char *node, const char *fstype, bool discard, unsigned long flags); + int home_unshare_and_mount(const char *node, const char *fstype, bool discard, unsigned long flags); + int home_move_mount(const char *user_name_and_realm, const char *target); ++int home_shift_uid(int dir_fd, const char *target, uid_t stored_uid, uid_t exposed_uid, int *ret_mount_fd); +-- +2.33.0 +
View file
_service:tar_scm:backport-namespace-util-introduce-userns_acquire-as-helper-fo.patch
Added
@@ -0,0 +1,132 @@ +From 979b0ff2c4dbebb3c04e3bca22be18e82a631098 Mon Sep 17 00:00:00 2001 +From: Lennart Poettering <lennart@poettering.net> +Date: Wed, 20 Oct 2021 13:15:27 +0200 +Subject: PATCH namespace-util: introduce userns_acquire() as helper for + allocating new unbound userns + +This returns a namespace fd, and takes a uidmap/gidmap as string. This +is split out out mount-util.c's remount_idmap() logic, so that we can +allocate a userns independently. +--- + src/basic/namespace-util.c | 41 ++++++++++++++++++++++++++++++++++++++ + src/basic/namespace-util.h | 2 ++ + src/shared/mount-util.c | 29 ++++----------------------- + 3 files changed, 47 insertions(+), 25 deletions(-) + +diff --git a/src/basic/namespace-util.c b/src/basic/namespace-util.c +index a55b76d..bf3e91c 100644 +--- a/src/basic/namespace-util.c ++++ b/src/basic/namespace-util.c +@@ -5,6 +5,7 @@ + #include <sys/mount.h> + + #include "fd-util.h" ++#include "fileio.h" + #include "missing_fs.h" + #include "missing_magic.h" + #include "namespace-util.h" +@@ -183,3 +184,43 @@ int detach_mount_namespace(void) { + + return 0; + } ++ ++int userns_acquire(const char *uid_map, const char *gid_map) { ++ char pathSTRLEN("/proc//uid_map") + DECIMAL_STR_MAX(pid_t) + 1; ++ _cleanup_(sigkill_waitp) pid_t pid = 0; ++ _cleanup_close_ int userns_fd = -1; ++ int r; ++ ++ assert(uid_map); ++ assert(gid_map); ++ ++ /* Forks off a process in a new userns, configures the specified uidmap/gidmap, acquires an fd to it, ++ * and then kills the process again. This way we have a userns fd that is not bound to any ++ * process. We can use that for file system mounts and similar. */ ++ ++ r = safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_NEW_USERNS, &pid); ++ if (r < 0) ++ return r; ++ if (r == 0) { ++ /* Child. We do nothing here, just freeze until somebody kills us. */ ++ freeze(); ++ _exit(EXIT_FAILURE); ++ } ++ ++ xsprintf(path, "/proc/" PID_FMT "/uid_map", pid); ++ r = write_string_file(path, uid_map, WRITE_STRING_FILE_DISABLE_BUFFER); ++ if (r < 0) ++ return log_error_errno(r, "Failed to write UID map: %m"); ++ ++ xsprintf(path, "/proc/" PID_FMT "/gid_map", pid); ++ r = write_string_file(path, gid_map, WRITE_STRING_FILE_DISABLE_BUFFER); ++ if (r < 0) ++ return log_error_errno(r, "Failed to write GID map: %m"); ++ ++ r = namespace_open(pid, NULL, NULL, NULL, &userns_fd, NULL); ++ if (r < 0) ++ return log_error_errno(r, "Failed to open netns fd: %m"); ++ ++ return TAKE_FD(userns_fd); ++ ++} +diff --git a/src/basic/namespace-util.h b/src/basic/namespace-util.h +index 39a6a46..24dce09 100644 +--- a/src/basic/namespace-util.h ++++ b/src/basic/namespace-util.h +@@ -24,3 +24,5 @@ static inline bool userns_shift_range_valid(uid_t shift, uid_t range) { + + return true; + } ++ ++int userns_acquire(const char *uid_map, const char *gid_map); +diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c +index 8ad395c..7a368e7 100644 +--- a/src/shared/mount-util.c ++++ b/src/shared/mount-util.c +@@ -999,39 +999,18 @@ int make_mount_point(const char *path) { + } + + static int make_userns(uid_t uid_shift, uid_t uid_range) { +- char uid_mapSTRLEN("/proc//uid_map") + DECIMAL_STR_MAX(uid_t) + 1, lineDECIMAL_STR_MAX(uid_t)*3+3+1; +- _cleanup_(sigkill_waitp) pid_t pid = 0; ++ char lineDECIMAL_STR_MAX(uid_t)*3+3+1; + _cleanup_close_ int userns_fd = -1; +- int r; + + /* Allocates a userns file descriptor with the mapping we need. For this we'll fork off a child + * process whose only purpose is to give us a new user namespace. It's killed when we got it. */ + +- r = safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_NEW_USERNS, &pid); +- if (r < 0) +- return r; +- if (r == 0) { +- /* Child. We do nothing here, just freeze until somebody kills us. */ +- freeze(); +- _exit(EXIT_FAILURE); +- } +- + xsprintf(line, UID_FMT " " UID_FMT " " UID_FMT "\n", 0, uid_shift, uid_range); + +- xsprintf(uid_map, "/proc/" PID_FMT "/uid_map", pid); +- r = write_string_file(uid_map, line, WRITE_STRING_FILE_DISABLE_BUFFER); +- if (r < 0) +- return log_error_errno(r, "Failed to write UID map: %m"); +- + /* We always assign the same UID and GID ranges */ +- xsprintf(uid_map, "/proc/" PID_FMT "/gid_map", pid); +- r = write_string_file(uid_map, line, WRITE_STRING_FILE_DISABLE_BUFFER); +- if (r < 0) +- return log_error_errno(r, "Failed to write GID map: %m"); +- +- r = namespace_open(pid, NULL, NULL, NULL, &userns_fd, NULL); +- if (r < 0) +- return r; ++ userns_fd = userns_acquire(line, line); ++ if (userns_fd < 0) ++ return log_debug_errno(userns_fd, "Failed to acquire new userns: %m"); + + return TAKE_FD(userns_fd); + } +-- +2.33.0 +
View file
_service:tar_scm:backport-nspawn-add-filesystem-id-mapping-support-to-bind-and.patch
Added
@@ -0,0 +1,236 @@ +From c0c8f7180023e7c72bf9dd67f1a82d3ea611d445 Mon Sep 17 00:00:00 2001 +From: Andreas Valder <git@notandy.de> +Date: Sat, 5 Jun 2021 18:39:38 +0200 +Subject: PATCH nspawn: add filesystem id mapping support to --bind and + --bind-ro + +--- + man/systemd-nspawn.xml | 16 +++++++----- + src/nspawn/nspawn-mount.c | 22 ++++++++++++++--- + src/nspawn/nspawn-mount.h | 2 +- + src/nspawn/nspawn.c | 52 +++++++++++++++++++-------------------- + 4 files changed, 55 insertions(+), 37 deletions(-) + +diff --git a/man/systemd-nspawn.xml b/man/systemd-nspawn.xml +index aec0b0e129..9c1cb33c01 100644 +--- a/man/systemd-nspawn.xml ++++ b/man/systemd-nspawn.xml +@@ -1357,17 +1357,21 @@ After=sys-subsystem-net-devices-ens1.device</programlisting> + source path is taken relative to the image's root directory. This permits setting up bind mounts within the + container image. The source path may be specified as empty string, in which case a temporary directory below + the host's <filename>/var/tmp/</filename> directory is used. It is automatically removed when the container is +- shut down. Mount options are comma-separated and currently, only <option>rbind</option> and +- <option>norbind</option> are allowed, controlling whether to create a recursive or a regular bind +- mount. Defaults to "rbind". Backslash escapes are interpreted, so <literal>\:</literal> may be used to embed +- colons in either path. This option may be specified multiple times for creating multiple independent bind +- mount points. The <option>--bind-ro=</option> option creates read-only bind mounts.</para> ++ shut down. The <option>--bind-ro=</option> option creates read-only bind mounts. Backslash escapes are interpreted, ++ so <literal>\:</literal> may be used to embed colons in either path. This option may be specified ++ multiple times for creating multiple independent bind mount points.</para> ++ ++ <para>Mount options are comma-separated. <option>rbind</option> and <option>norbind</option> control whether ++ to create a recursive or a regular bind mount. Defaults to "rbind". <option>idmap</option> and <option>noidmap</option> ++ control if the bind mount should use filesystem id mappings. Using this option requires support by the source filesystem ++ for id mappings. Defaults to "noidmap".</para> + + <para>Note that when this option is used in combination with <option>--private-users</option>, the resulting + mount points will be owned by the <constant>nobody</constant> user. That's because the mount and its files and + directories continue to be owned by the relevant host users and groups, which do not exist in the container, + and thus show up under the wildcard UID 65534 (nobody). If such bind mounts are created, it is recommended to +- make them read-only, using <option>--bind-ro=</option>.</para></listitem> ++ make them read-only, using <option>--bind-ro=</option>. Alternatively you can use the "idmap" mount option to ++ map the filesystem ids.</para></listitem> + </varlistentry> + + <varlistentry> +diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c +index 035fe287ee..751e270c6c 100644 +--- a/src/nspawn/nspawn-mount.c ++++ b/src/nspawn/nspawn-mount.c +@@ -672,9 +672,10 @@ int mount_all(const char *dest, + return 0; + } + +-static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts) { ++static int parse_mount_bind_options(const char *options, unsigned long *mount_flags, char **mount_opts, bool *idmapped) { + unsigned long flags = *mount_flags; + char *opts = NULL; ++ bool flag_idmapped = *idmapped; + int r; + + assert(options); +@@ -692,29 +693,35 @@ static int parse_mount_bind_options(const char *options, unsigned long *mount_fl + flags |= MS_REC; + else if (streq(word, "norbind")) + flags &= ~MS_REC; ++ else if (streq(word, "idmap")) ++ flag_idmapped = true; ++ else if (streq(word, "noidmap")) ++ flag_idmapped = false; + else + return log_error_errno(SYNTHETIC_ERRNO(EINVAL), + "Invalid bind mount option: %s", word); + } + + *mount_flags = flags; ++ *idmapped = flag_idmapped; + /* in the future mount_opts will hold string options for mount(2) */ + *mount_opts = opts; + + return 0; + } + +-static int mount_bind(const char *dest, CustomMount *m) { ++static int mount_bind(const char *dest, CustomMount *m, uid_t uid_shift, uid_t uid_range) { + _cleanup_free_ char *mount_opts = NULL, *where = NULL; + unsigned long mount_flags = MS_BIND | MS_REC; + struct stat source_st, dest_st; + int r; ++ bool idmapped = false; + + assert(dest); + assert(m); + + if (m->options) { +- r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts); ++ r = parse_mount_bind_options(m->options, &mount_flags, &mount_opts, &idmapped); + if (r < 0) + return r; + } +@@ -767,6 +774,12 @@ static int mount_bind(const char *dest, CustomMount *m) { + return log_error_errno(r, "Read-only bind mount failed: %m"); + } + ++ if (idmapped) { ++ r = remount_idmap(where, uid_shift, uid_range); ++ if (r < 0) ++ return log_error_errno(r, "Failed to map ids for bind mount %s: %m", where); ++ } ++ + return 0; + } + +@@ -906,6 +919,7 @@ int mount_custom( + const char *dest, + CustomMount *mounts, size_t n, + uid_t uid_shift, ++ uid_t uid_range, + const char *selinux_apifs_context, + MountSettingsMask mount_settings) { + int r; +@@ -927,7 +941,7 @@ int mount_custom( + switch (m->type) { + + case CUSTOM_MOUNT_BIND: +- r = mount_bind(dest, m); ++ r = mount_bind(dest, m, uid_shift, uid_range); + break; + + case CUSTOM_MOUNT_TMPFS: +diff --git a/src/nspawn/nspawn-mount.h b/src/nspawn/nspawn-mount.h +index e19b2cc4e1..6bedbf9b3f 100644 +--- a/src/nspawn/nspawn-mount.h ++++ b/src/nspawn/nspawn-mount.h +@@ -58,7 +58,7 @@ int inaccessible_mount_parse(CustomMount **l, size_t *n, const char *s); + int mount_all(const char *dest, MountSettingsMask mount_settings, uid_t uid_shift, const char *selinux_apifs_context); + int mount_sysfs(const char *dest, MountSettingsMask mount_settings); + +-int mount_custom(const char *dest, CustomMount *mounts, size_t n, uid_t uid_shift, const char *selinux_apifs_context, MountSettingsMask mount_settings); ++int mount_custom(const char *dest, CustomMount *mounts, size_t n, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context, MountSettingsMask mount_settings); + bool has_custom_root_mount(const CustomMount *mounts, size_t n); + + int setup_volatile_mode(const char *directory, VolatileMode mode, uid_t uid_shift, const char *selinux_apifs_context); +diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c +index ea68dc248a..8c0bc99d72 100644 +--- a/src/nspawn/nspawn.c ++++ b/src/nspawn/nspawn.c +@@ -3314,6 +3314,7 @@ static int inner_child( + arg_custom_mounts, + arg_n_custom_mounts, + 0, ++ 0, + arg_selinux_apifs_context, + MOUNT_NON_ROOT_ONLY | MOUNT_IN_USERNS); + if (r < 0) +@@ -3719,32 +3720,6 @@ static int outer_child( + directory = "/run/systemd/nspawn-root"; + } + +- if (arg_userns_mode != USER_NAMESPACE_NO && +- IN_SET(arg_userns_ownership, USER_NAMESPACE_OWNERSHIP_MAP, USER_NAMESPACE_OWNERSHIP_AUTO) && +- arg_uid_shift != 0) { +- r = make_mount_point(directory); +- if (r < 0) +- return r; +- +- r = remount_idmap(directory, arg_uid_shift, arg_uid_range); +- if (r == -EINVAL || ERRNO_IS_NOT_SUPPORTED(r)) { +- /* This might fail because the kernel or file system doesn't support idmapping. We +- * can't really distinguish this nicely, nor do we have any guarantees about the +- * error codes we see, could be EOPNOTSUPP or EINVAL. */ +- if (arg_userns_ownership != USER_NAMESPACE_OWNERSHIP_AUTO) +- return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), +- "ID mapped mounts are apparently not available, sorry."); +- +- log_debug("ID mapped mounts are apparently not available on this kernel or for the selected file system, reverting to recursive chown()ing."); +- arg_userns_ownership = USER_NAMESPACE_OWNERSHIP_CHOWN; +- } else if (r < 0) +- return log_error_errno(r, "Failed to set up ID mapped mounts: %m"); +- else { +- log_debug("ID mapped mounts available, making use of them."); +- idmap = true; +- } +- } +- + r = setup_pivot_root( + directory, + arg_pivot_root_new, +@@ -3795,6 +3770,7 @@ static int outer_child( + arg_custom_mounts, + arg_n_custom_mounts, + arg_uid_shift, ++ arg_uid_range, + arg_selinux_apifs_context, + MOUNT_ROOT_ONLY); + if (r < 0) +@@ -3805,6 +3781,29 @@ static int outer_child( + if (r < 0) + return r; + ++ if (arg_userns_mode != USER_NAMESPACE_NO && ++ IN_SET(arg_userns_ownership, USER_NAMESPACE_OWNERSHIP_MAP, USER_NAMESPACE_OWNERSHIP_AUTO) && ++ arg_uid_shift != 0) { ++ ++ r = remount_idmap(directory, arg_uid_shift, arg_uid_range); ++ if (r == -EINVAL || ERRNO_IS_NOT_SUPPORTED(r)) { ++ /* This might fail because the kernel or file system doesn't support idmapping. We ++ * can't really distinguish this nicely, nor do we have any guarantees about the ++ * error codes we see, could be EOPNOTSUPP or EINVAL. */ ++ if (arg_userns_ownership != USER_NAMESPACE_OWNERSHIP_AUTO) ++ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), ++ "ID mapped mounts are apparently not available, sorry."); ++ ++ log_debug("ID mapped mounts are apparently not available on this kernel or for the selected file system, reverting to recursive chown()ing."); ++ arg_userns_ownership = USER_NAMESPACE_OWNERSHIP_CHOWN; ++ } else if (r < 0) ++ return log_error_errno(r, "Failed to set up ID mapped mounts: %m"); ++ else { ++ log_debug("ID mapped mounts available, making use of them."); ++ idmap = true; ++ } ++ } ++ + if (dissected_image) { + /* Now we know the uid shift, let's now mount everything else that might be in the image. */ + r = dissected_image_mount( +@@ -3915,6 +3914,7 @@ static int outer_child( + arg_custom_mounts, + arg_n_custom_mounts, + arg_uid_shift, ++ arg_uid_range, + arg_selinux_apifs_context, + MOUNT_NON_ROOT_ONLY); + if (r < 0) +-- +2.27.0 +
View file
_service:tar_scm:backport-nspawn-if-we-refuse-to-operate-on-some-directory-exp.patch
Added
@@ -0,0 +1,68 @@ +From aff7ae0d67e0cd4b3c60e043ea9112d3c2972fce Mon Sep 17 00:00:00 2001 +From: Lennart Poettering <lennart@poettering.net> +Date: Thu, 17 Mar 2022 10:31:55 +0100 +Subject: PATCH nspawn: if we refuse to operate on some directory, explain + why + +(Also, some refactoring to use safer path_join()) +--- + src/nspawn/nspawn.c | 31 ++++++++++++++++++------------- + 1 file changed, 18 insertions(+), 13 deletions(-) + +diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c +index 1a654d8817..a185585b50 100644 +--- a/src/nspawn/nspawn.c ++++ b/src/nspawn/nspawn.c +@@ -5612,31 +5612,36 @@ static int run(int argc, char *argv) { + } + + if (arg_start_mode == START_BOOT) { ++ _cleanup_free_ char *b = NULL; + const char *p; + +- if (arg_pivot_root_new) +- p = prefix_roota(arg_directory, arg_pivot_root_new); +- else ++ if (arg_pivot_root_new) { ++ b = path_join(arg_directory, arg_pivot_root_new); ++ if (!b) ++ return log_oom(); ++ ++ p = b; ++ } else + p = arg_directory; + + if (path_is_os_tree(p) <= 0) { +- log_error("Directory %s doesn't look like an OS root directory (os-release file is missing). Refusing.", p); +- r = -EINVAL; ++ r = log_error_errno(SYNTHETIC_ERRNO(EINVAL), ++ "Directory %s doesn't look like an OS root directory (os-release file is missing). Refusing.", p); + goto finish; + } + } else { +- const char *p, *q; ++ _cleanup_free_ char *p = NULL; + + if (arg_pivot_root_new) +- p = prefix_roota(arg_directory, arg_pivot_root_new); ++ p = path_join(arg_directory, arg_pivot_root_new, "/usr/"); + else +- p = arg_directory; +- +- q = strjoina(p, "/usr/"); ++ p = path_join(arg_directory, "/usr/"); ++ if (!p) ++ return log_oom(); + +- if (laccess(q, F_OK) < 0) { +- log_error("Directory %s doesn't look like it has an OS tree. Refusing.", p); +- r = -EINVAL; ++ if (laccess(p, F_OK) < 0) { ++ r = log_error_errno(SYNTHETIC_ERRNO(EINVAL), ++ "Directory %s doesn't look like it has an OS tree (/usr/ directory is missing). Refusing.", arg_directory); + goto finish; + } + } +-- +2.27.0 +
View file
_service:tar_scm:backport-nspawn-make-more-stuff-const.patch
Added
@@ -0,0 +1,26 @@ +From 1eb874b97819983f2e9a3355efbb90e929d39398 Mon Sep 17 00:00:00 2001 +From: Lennart Poettering <lennart@poettering.net> +Date: Thu, 17 Mar 2022 10:02:41 +0100 +Subject: PATCH nspawn: make more stuff const + +And if we make it const, we can also make it static. +--- + src/nspawn/nspawn.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c +index 8e0bc1c077..1a654d8817 100644 +--- a/src/nspawn/nspawn.c ++++ b/src/nspawn/nspawn.c +@@ -3563,7 +3563,7 @@ static int inner_child( + + static int setup_notify_child(void) { + _cleanup_close_ int fd = -1; +- union sockaddr_union sa = { ++ static const union sockaddr_union sa = { + .un.sun_family = AF_UNIX, + .un.sun_path = NSPAWN_NOTIFY_SOCKET_PATH, + }; +-- +2.27.0 +
View file
_service:tar_scm:backport-nspawn-make-sure-host-root-can-write-to-the-uidmappe.patch
Added
@@ -0,0 +1,213 @@ +From 50ae2966d20b0b4a19def060de3b966b7a70b54a Mon Sep 17 00:00:00 2001 +From: Lennart Poettering <lennart@poettering.net> +Date: Thu, 17 Mar 2022 13:46:12 +0100 +Subject: PATCH nspawn: make sure host root can write to the uidmapped mounts + we prepare for the container payload +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When using user namespaces in conjunction with uidmapped mounts, nspawn +so far set up two uidmappings: + +1. One that is used for the uidmapped mount and that maps the UID range + 0…65535 on the backing fs to some high UID range X…X+65535 on the + uidmapped fs. (Let's call this mapping the "mount mapping") + +2. One that is used for the userns namespace the container payload + processes run in, that maps X…X+65535 back to 0…65535. (Let's call + this one the "process mapping"). + +These mappings hence are pretty much identical, one just moves things up +and one back down. (Reminder: we do all this so that the processes can +run under high UIDs while running off file systems that require no +recursive chown()ing, i.e. we want processes with high UID range but +files with low UID range.) + +This creates one problem, i.e. issue #20989: if nspawn (which runs as +host root, i.e. host UID 0) wants to add inodes to the uidmapped mount +it can't do that, since host UID 0 is not defined in the mount mapping +(only the X…X+65536 range is, after all, and X > 0), and processes whose +UID is not mapped in a uidmapped fs cannot create inodes in it since +those would be owned by an unmapped UID, which then triggers +the famous EOVERFLOW error. + +Let's fix this, by explicitly including an entry for the host UID 0 in +the mount mapping. Specifically, we'll extend the mount mapping to map +UID 2147483646 (which is INT32_MAX-1, see code for an explanation why I +picked this one) of the backing fs to UID 0 on the uidmapped fs. This +way nspawn can creates inode on the uidmapped as it likes (which will +then actually be owned by UID 2147483646 on the backing fs), and as it +always did. Note that we do *not* create a similar entry in the process +mapping. Thus any files created by nspawn that way (and not chown()ed to +something better) will appear as unmapped (i.e. as overflowuid/"nobody") +in the container payload. And that's good. Of course, the latter is +mostly theoretic, as nspawn should generally chown() the inodes it +creates to UID ranges that actually make sense for the container (and we +generally already do this correctly), but it#s good to know that we are +safe here, given we might accidentally forget to chown() some inodes we +create. + +Net effect: the two mappings will not be identical anymore. The mount +mapping has one entry more, and the only reason it exists is so that +nspawn can access the uidmapped fs reasonably independently from any +process mapping. + +Fixes: #20989 +--- + src/basic/user-util.h | 13 +++++++++++++ + src/nspawn/nspawn-mount.c | 2 +- + src/nspawn/nspawn.c | 2 +- + src/shared/dissect-image.c | 2 +- + src/shared/mount-util.c | 28 +++++++++++++++++++++++----- + src/shared/mount-util.h | 13 ++++++++++++- + 6 files changed, 51 insertions(+), 9 deletions(-) + +diff --git a/src/basic/user-util.h b/src/basic/user-util.h +index 40979d1080..e1692c4f66 100644 +--- a/src/basic/user-util.h ++++ b/src/basic/user-util.h +@@ -67,6 +67,19 @@ int take_etc_passwd_lock(const char *root); + #define UID_NOBODY ((uid_t) 65534U) + #define GID_NOBODY ((gid_t) 65534U) + ++/* If REMOUNT_IDMAP_HOST_ROOT is set for remount_idmap() we'll include a mapping here that maps the host root ++ * user accessing the idmapped mount to the this user ID on the backing fs. This is the last valid UID in the ++ * *signed* 32bit range. You might wonder why precisely use this specific UID for this purpose? Well, we ++ * definitely cannot use the first 0…65536 UIDs for that, since in most cases that's precisely the file range ++ * we intend to map to some high UID range, and since UID mappings have to be bijective we thus cannot use ++ * them at all. Furthermore the UID range beyond INT32_MAX (i.e. the range above the signed 32bit range) is ++ * icky, since many APIs cannot use it (example: setfsuid() returns the old UID as signed integer). Following ++ * our usual logic of assigning a 16bit UID range to each container, so that the upper 16bit of a 32bit UID ++ * value indicate kind of a "container ID" and the lower 16bit map directly to the intended user you can read ++ * this specific UID as the "nobody" user of the container with ID 0x7FFF, which is kinda nice. */ ++#define UID_MAPPED_ROOT ((uid_t) (INT32_MAX-1)) ++#define GID_MAPPED_ROOT ((gid_t) (INT32_MAX-1)) ++ + #define ETC_PASSWD_LOCK_PATH "/etc/.pwd.lock" + + /* The following macros add 1 when converting things, since UID 0 is a valid UID, while the pointer +diff --git a/src/nspawn/nspawn-mount.c b/src/nspawn/nspawn-mount.c +index 40773d90c1..f2fad0f462 100644 +--- a/src/nspawn/nspawn-mount.c ++++ b/src/nspawn/nspawn-mount.c +@@ -780,7 +780,7 @@ static int mount_bind(const char *dest, CustomMount *m, uid_t uid_shift, uid_t u + } + + if (idmapped) { +- r = remount_idmap(where, uid_shift, uid_range); ++ r = remount_idmap(where, uid_shift, uid_range, REMOUNT_IDMAP_HOST_ROOT); + if (r < 0) + return log_error_errno(r, "Failed to map ids for bind mount %s: %m", where); + } +diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c +index a185585b50..7fe061899c 100644 +--- a/src/nspawn/nspawn.c ++++ b/src/nspawn/nspawn.c +@@ -3781,7 +3781,7 @@ static int outer_child( + IN_SET(arg_userns_ownership, USER_NAMESPACE_OWNERSHIP_MAP, USER_NAMESPACE_OWNERSHIP_AUTO) && + arg_uid_shift != 0) { + +- r = remount_idmap(directory, arg_uid_shift, arg_uid_range); ++ r = remount_idmap(directory, arg_uid_shift, arg_uid_range, REMOUNT_IDMAP_HOST_ROOT); + if (r == -EINVAL || ERRNO_IS_NOT_SUPPORTED(r)) { + /* This might fail because the kernel or file system doesn't support idmapping. We + * can't really distinguish this nicely, nor do we have any guarantees about the +diff --git a/src/shared/dissect-image.c b/src/shared/dissect-image.c +index 14519ead70..14361a0e5a 100644 +--- a/src/shared/dissect-image.c ++++ b/src/shared/dissect-image.c +@@ -1807,7 +1807,7 @@ static int mount_partition( + (void) fs_grow(node, p); + + if (remap_uid_gid) { +- r = remount_idmap(p, uid_shift, uid_range); ++ r = remount_idmap(p, uid_shift, uid_range, REMOUNT_IDMAP_HOST_ROOT); + if (r < 0) + return r; + } +diff --git a/src/shared/mount-util.c b/src/shared/mount-util.c +index 7f88a982bd..ae1f2f0b80 100644 +--- a/src/shared/mount-util.c ++++ b/src/shared/mount-util.c +@@ -1049,14 +1049,31 @@ int make_mount_point(const char *path) { + return 1; + } + +-static int make_userns(uid_t uid_shift, uid_t uid_range) { +- char lineDECIMAL_STR_MAX(uid_t)*3+3+1; ++static int make_userns(uid_t uid_shift, uid_t uid_range, RemountIdmapFlags flags) { + _cleanup_close_ int userns_fd = -1; ++ _cleanup_free_ char *line = NULL; + + /* Allocates a userns file descriptor with the mapping we need. For this we'll fork off a child + * process whose only purpose is to give us a new user namespace. It's killed when we got it. */ + +- xsprintf(line, UID_FMT " " UID_FMT " " UID_FMT "\n", 0, uid_shift, uid_range); ++ if (asprintf(&line, UID_FMT " " UID_FMT " " UID_FMT "\n", 0, uid_shift, uid_range) < 0) ++ return log_oom_debug(); ++ ++ /* If requested we'll include an entry in the mapping so that the host root user can make changes to ++ * the uidmapped mount like it normally would. Specifically, we'll map the user with UID_HOST_ROOT on ++ * the backing fs to UID 0. This is useful, since nspawn code wants to create various missing inodes ++ * in the OS tree before booting into it, and this becomes very easy and straightforward to do if it ++ * can just do it under its own regular UID. Note that in that case the container's runtime uidmap ++ * (i.e. the one the container payload processes run in) will leave this UID unmapped, i.e. if we ++ * accidentally leave files owned by host root in the already uidmapped tree around they'll show up ++ * as owned by 'nobody', which is safe. (Of course, we shouldn't leave such inodes around, but always ++ * chown() them to the container's own UID range, but it's good to have a safety net, in case we ++ * forget it.) */ ++ if (flags & REMOUNT_IDMAP_HOST_ROOT) ++ if (strextendf(&line, ++ UID_FMT " " UID_FMT " " UID_FMT "\n", ++ UID_MAPPED_ROOT, 0, 1) < 0) ++ return log_oom_debug(); + + /* We always assign the same UID and GID ranges */ + userns_fd = userns_acquire(line, line); +@@ -1069,7 +1086,8 @@ static int make_userns(uid_t uid_shift, uid_t uid_range) { + int remount_idmap( + const char *p, + uid_t uid_shift, +- uid_t uid_range) { ++ uid_t uid_range, ++ RemountIdmapFlags flags) { + + _cleanup_close_ int mount_fd = -1, userns_fd = -1; + int r; +@@ -1085,7 +1103,7 @@ int remount_idmap( + return log_debug_errno(errno, "Failed to open tree of mounted filesystem '%s': %m", p); + + /* Create a user namespace mapping */ +- userns_fd = make_userns(uid_shift, uid_range); ++ userns_fd = make_userns(uid_shift, uid_range, flags); + if (userns_fd < 0) + return userns_fd; + +diff --git a/src/shared/mount-util.h b/src/shared/mount-util.h +index 9caf92308a..960d586ba9 100644 +--- a/src/shared/mount-util.h ++++ b/src/shared/mount-util.h +@@ -112,7 +112,18 @@ int mount_image_in_namespace(pid_t target, const char *propagate_path, const cha + + int make_mount_point(const char *path); + +-int remount_idmap(const char *p, uid_t uid_shift, uid_t uid_range); ++typedef enum RemountIdmapFlags { ++ /* Include a mapping from UID_MAPPED_ROOT (i.e. UID 2^31-2) on the backing fs to UID 0 on the ++ * uidmapped fs. This is useful to ensure that the host root user can safely add inodes to the ++ * uidmapped fs (which otherwise wouldn't work as the host root user is not defined on the uidmapped ++ * mount and any attempts to create inodes will then be refused with EOVERFLOW). The idea is that ++ * these inodes are quickly re-chown()ed to more suitable UIDs/GIDs. Any code that intends to be able ++ * to add inodes to file systems mapped this way should set this flag, but given it comes with ++ * certain security implications defaults to off, and requires explicit opt-in. */ ++ REMOUNT_IDMAP_HOST_ROOT = 1 << 0, ++} RemountIdmapFlags; ++ ++int remount_idmap(const char *p, uid_t uid_shift, uid_t uid_range, RemountIdmapFlags flags); + + /* Creates a mount point (not parents) based on the source path or stat - ie, a file or a directory */ + int make_mount_point_inode_from_stat(const struct stat *st, const char *dest, mode_t mode); +-- +2.27.0 +
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2