alloc_fdtable(): change calling conventions.

[ Upstream commit 1d3b4bec3ce55e0c46cdce7d0402dbd6b4af3a3d ]

First of all, tell it how many slots do we want, not which slot
is wanted.  It makes one caller (dup_fd()) more straightforward
and doesn't harm another (expand_fdtable()).

Furthermore, make it return ERR_PTR() on failure rather than
returning NULL.  Simplifies the callers.

Simplify the size calculation, while we are at it - note that we
always have slots_wanted greater than BITS_PER_LONG.  What the
rules boil down to is
	* use the smallest power of two large enough to give us
that many slots
	* on 32bit skip 64 and 128 - the minimal capacity we want
there is 256 slots (i.e. 1Kb fd array).
	* on 64bit don't skip anything, the minimal capacity is
128 - and we'll never be asked for 64 or less.  128 slots means
1Kb fd array, again.
	* on 128bit, if that ever happens, don't skip anything -
we'll never be asked for 128 or less, so the fd array allocation
will be at least 2Kb.

Reviewed-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Ulrich Hecht <uli@kernel.org>
This commit is contained in:
Al Viro
2025-08-26 17:27:47 -04:00
committed by Ulrich Hecht
parent 7c61acaaf7
commit 7c1742631e

View File

@@ -78,9 +78,17 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
copy_fd_bitmaps(nfdt, ofdt, fdt_words(ofdt));
}
static struct fdtable * alloc_fdtable(unsigned int nr)
/*
* Note how the fdtable bitmap allocations very much have to be a multiple of
* BITS_PER_LONG. This is not only because we walk those things in chunks of
* 'unsigned long' in some places, but simply because that is how the Linux
* kernel bitmaps are defined to work: they are not "bits in an array of bytes",
* they are very much "bits in an array of unsigned long".
*/
static struct fdtable *alloc_fdtable(unsigned int slots_wanted)
{
struct fdtable *fdt;
unsigned int nr;
void *data;
/*
@@ -88,21 +96,32 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
* Allocation steps are keyed to the size of the fdarray, since it
* grows far faster than any of the other dynamic data. We try to fit
* the fdarray into comfortable page-tuned chunks: starting at 1024B
* and growing in powers of two from there on.
* and growing in powers of two from there on. Since we called only
* with slots_wanted > BITS_PER_LONG (embedded instance in files->fdtab
* already gives BITS_PER_LONG slots), the above boils down to
* 1. use the smallest power of two large enough to give us that many
* slots.
* 2. on 32bit skip 64 and 128 - the minimal capacity we want there is
* 256 slots (i.e. 1Kb fd array).
* 3. on 64bit don't skip anything, 1Kb fd array means 128 slots there
* and we are never going to be asked for 64 or less.
*/
nr /= (1024 / sizeof(struct file *));
nr = roundup_pow_of_two(nr + 1);
nr *= (1024 / sizeof(struct file *));
if (IS_ENABLED(CONFIG_32BIT) && slots_wanted < 256)
nr = 256;
else
nr = roundup_pow_of_two(slots_wanted);
/*
* Note that this can drive nr *below* what we had passed if sysctl_nr_open
* had been set lower between the check in expand_files() and here. Deal
* with that in caller, it's cheaper that way.
* had been set lower between the check in expand_files() and here.
*
* We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
* bitmaps handling below becomes unpleasant, to put it mildly...
*/
if (unlikely(nr > sysctl_nr_open))
nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
if (unlikely(nr > sysctl_nr_open)) {
nr = round_down(sysctl_nr_open, BITS_PER_LONG);
if (nr < slots_wanted)
return ERR_PTR(-EMFILE);
}
/*
* Check if the allocation size would exceed INT_MAX. kvmalloc_array()
@@ -146,7 +165,7 @@ out_arr:
out_fdt:
kfree(fdt);
out:
return NULL;
return ERR_PTR(-ENOMEM);
}
/*
@@ -163,7 +182,7 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr)
struct fdtable *new_fdt, *cur_fdt;
spin_unlock(&files->file_lock);
new_fdt = alloc_fdtable(nr);
new_fdt = alloc_fdtable(nr + 1);
/* make sure all __fd_install() have seen resize_in_progress
* or have finished their rcu_read_lock_sched() section.
@@ -172,16 +191,8 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr)
synchronize_sched();
spin_lock(&files->file_lock);
if (!new_fdt)
return -ENOMEM;
/*
* extremely unlikely race - sysctl_nr_open decreased between the check in
* caller and alloc_fdtable(). Cheaper to catch it here...
*/
if (unlikely(new_fdt->max_fds <= nr)) {
__free_fdtable(new_fdt);
return -EMFILE;
}
if (IS_ERR(new_fdt))
return PTR_ERR(new_fdt);
cur_fdt = files_fdtable(files);
BUG_ON(nr < cur_fdt->max_fds);
copy_fdtable(new_fdt, cur_fdt);
@@ -318,16 +329,9 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
if (new_fdt != &newf->fdtab)
__free_fdtable(new_fdt);
new_fdt = alloc_fdtable(open_files - 1);
if (!new_fdt) {
*errorp = -ENOMEM;
goto out_release;
}
/* beyond sysctl_nr_open; nothing to do */
if (unlikely(new_fdt->max_fds < open_files)) {
__free_fdtable(new_fdt);
*errorp = -EMFILE;
new_fdt = alloc_fdtable(open_files);
if (IS_ERR(new_fdt)) {
*errorp = PTR_ERR(new_fdt);
goto out_release;
}