commit 85b6d24646e4125c591639841169baa98a2da503 upstream.
Currently, the exit_shm() function not designed to work properly when
task->sysvshm.shm_clist holds shm objects from different IPC namespaces.
This is a real pain when sysctl kernel.shm_rmid_forced = 1, because it
leads to use-after-free (reproducer exists).
This is an attempt to fix the problem by extending exit_shm mechanism to
handle shm's destroy from several IPC ns'es.
To achieve that we do several things:
1. add a namespace (non-refcounted) pointer to the struct shmid_kernel
2. during new shm object creation (newseg()/shmget syscall) we
initialize this pointer by current task IPC ns
3. exit_shm() fully reworked such that it traverses over all shp's in
task->sysvshm.shm_clist and gets IPC namespace not from current task
as it was before but from shp's object itself, then call
shm_destroy(shp, ns).
Note: We need to be really careful here, because as it was said before
(1), our pointer to IPC ns non-refcnt'ed. To be on the safe side we
using special helper get_ipc_ns_not_zero() which allows to get IPC ns
refcounter only if IPC ns not in the "state of destruction".
Q/A
Q: Why can we access shp->ns memory using non-refcounted pointer?
A: Because shp object lifetime is always shorther than IPC namespace
lifetime, so, if we get shp object from the task->sysvshm.shm_clist
while holding task_lock(task) nobody can steal our namespace.
Q: Does this patch change semantics of unshare/setns/clone syscalls?
A: No. It's just fixes non-covered case when process may leave IPC
namespace without getting task->sysvshm.shm_clist list cleaned up.
Link: https://lkml.kernel.org/r/67bb03e5-f79c-1815-e2bf-949c67047418@colorfullife.com
Link: https://lkml.kernel.org/r/20211109151501.4921-1-manfred@colorfullife.com
Fixes: ab602f7991 ("shm: make exit_shm work proportional to task activity")
Co-developed-by: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Alexander Mikhalitsyn <alexander.mikhalitsyn@virtuozzo.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Andrei Vagin <avagin@gmail.com>
Cc: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Cc: Vasily Averin <vvs@virtuozzo.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
91 lines
2.2 KiB
C
91 lines
2.2 KiB
C
#ifndef _LINUX_SHM_H_
|
|
#define _LINUX_SHM_H_
|
|
|
|
#include <linux/list.h>
|
|
#include <asm/page.h>
|
|
#include <uapi/linux/shm.h>
|
|
#include <asm/shmparam.h>
|
|
|
|
struct shmid_kernel /* private to the kernel */
|
|
{
|
|
struct kern_ipc_perm shm_perm;
|
|
struct file *shm_file;
|
|
unsigned long shm_nattch;
|
|
unsigned long shm_segsz;
|
|
time_t shm_atim;
|
|
time_t shm_dtim;
|
|
time_t shm_ctim;
|
|
pid_t shm_cprid;
|
|
pid_t shm_lprid;
|
|
struct user_struct *mlock_user;
|
|
|
|
/*
|
|
* The task created the shm object, for
|
|
* task_lock(shp->shm_creator)
|
|
*/
|
|
struct task_struct *shm_creator;
|
|
|
|
/*
|
|
* List by creator. task_lock(->shm_creator) required for read/write.
|
|
* If list_empty(), then the creator is dead already.
|
|
*/
|
|
struct list_head shm_clist;
|
|
struct ipc_namespace *ns;
|
|
};
|
|
|
|
/* shm_mode upper byte flags */
|
|
#define SHM_DEST 01000 /* segment will be destroyed on last detach */
|
|
#define SHM_LOCKED 02000 /* segment will not be swapped */
|
|
#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */
|
|
#define SHM_NORESERVE 010000 /* don't check for reservations */
|
|
|
|
/* Bits [26:31] are reserved */
|
|
|
|
/*
|
|
* When SHM_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
|
|
* This gives us 6 bits, which is enough until someone invents 128 bit address
|
|
* spaces.
|
|
*
|
|
* Assume these are all power of twos.
|
|
* When 0 use the default page size.
|
|
*/
|
|
#define SHM_HUGE_SHIFT 26
|
|
#define SHM_HUGE_MASK 0x3f
|
|
#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT)
|
|
#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT)
|
|
|
|
#ifdef CONFIG_SYSVIPC
|
|
struct sysv_shm {
|
|
struct list_head shm_clist;
|
|
};
|
|
|
|
long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr,
|
|
unsigned long shmlba);
|
|
bool is_file_shm_hugepages(struct file *file);
|
|
void exit_shm(struct task_struct *task);
|
|
#define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist)
|
|
#else
|
|
struct sysv_shm {
|
|
/* empty */
|
|
};
|
|
|
|
static inline long do_shmat(int shmid, char __user *shmaddr,
|
|
int shmflg, unsigned long *addr,
|
|
unsigned long shmlba)
|
|
{
|
|
return -ENOSYS;
|
|
}
|
|
static inline bool is_file_shm_hugepages(struct file *file)
|
|
{
|
|
return false;
|
|
}
|
|
static inline void exit_shm(struct task_struct *task)
|
|
{
|
|
}
|
|
static inline void shm_init_task(struct task_struct *task)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
#endif /* _LINUX_SHM_H_ */
|