commit e88b2c6e5a4d9ce30d75391e4d950da74bb2bd90 upstream.
While reviewing a different fix, John and I noticed an oddity in one of the
BPF program dumps that stood out, for example:
# bpftool p d x i 13
0: (b7) r0 = 808464450
1: (b4) w4 = 808464432
2: (bc) w0 = w0
3: (15) if r0 == 0x0 goto pc+1
4: (9c) w4 %= w0
[...]
In line 2 we noticed that the mov32 would 32 bit truncate the original src
register for the div/mod operation. While for the two operations the dst
register is typically marked unknown e.g. from adjust_scalar_min_max_vals()
the src register is not, and thus verifier keeps tracking original bounds,
simplified:
0: R1=ctx(id=0,off=0,imm=0) R10=fp0
0: (b7) r0 = -1
1: R0_w=invP-1 R1=ctx(id=0,off=0,imm=0) R10=fp0
1: (b7) r1 = -1
2: R0_w=invP-1 R1_w=invP-1 R10=fp0
2: (3c) w0 /= w1
3: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1_w=invP-1 R10=fp0
3: (77) r1 >>= 32
4: R0_w=invP(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R1_w=invP4294967295 R10=fp0
4: (bf) r0 = r1
5: R0_w=invP4294967295 R1_w=invP4294967295 R10=fp0
5: (95) exit
processed 6 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0
Runtime result of r0 at exit is 0 instead of expected -1. Remove the
verifier mov32 src rewrite in div/mod and replace it with a jmp32 test
instead. After the fix, we result in the following code generation when
having dividend r1 and divisor r6:
div, 64 bit: div, 32 bit:
0: (b7) r6 = 8 0: (b7) r6 = 8
1: (b7) r1 = 8 1: (b7) r1 = 8
2: (55) if r6 != 0x0 goto pc+2 2: (56) if w6 != 0x0 goto pc+2
3: (ac) w1 ^= w1 3: (ac) w1 ^= w1
4: (05) goto pc+1 4: (05) goto pc+1
5: (3f) r1 /= r6 5: (3c) w1 /= w6
6: (b7) r0 = 0 6: (b7) r0 = 0
7: (95) exit 7: (95) exit
mod, 64 bit: mod, 32 bit:
0: (b7) r6 = 8 0: (b7) r6 = 8
1: (b7) r1 = 8 1: (b7) r1 = 8
2: (15) if r6 == 0x0 goto pc+1 2: (16) if w6 == 0x0 goto pc+1
3: (9f) r1 %= r6 3: (9c) w1 %= w6
4: (b7) r0 = 0 4: (b7) r0 = 0
5: (95) exit 5: (95) exit
x86 in particular can throw a 'divide error' exception for div
instruction not only for divisor being zero, but also for the case
when the quotient is too large for the designated register. For the
edx:eax and rdx:rax dividend pair it is not an issue in x86 BPF JIT
since we always zero edx (rdx). Hence really the only protection
needed is against divisor being zero.
Also add some other code missed when backporting.
Fixes: 68fda450a7df ("bpf: fix 32-bit divide by zero")
Co-developed-by: John Fastabend <john.fastabend@gmail.com>
Change-Id: I35a7f4f346bbcbc2f01003e607f2b00b7abe92ae
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
197 lines
4.3 KiB
C
197 lines
4.3 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/* tnum: tracked (or tristate) numbers
|
|
*
|
|
* A tnum tracks knowledge about the bits of a value. Each bit can be either
|
|
* known (0 or 1), or unknown (x). Arithmetic operations on tnums will
|
|
* propagate the unknown bits such that the tnum result represents all the
|
|
* possible results for possible values of the operands.
|
|
*/
|
|
#include <linux/kernel.h>
|
|
#include <linux/tnum.h>
|
|
|
|
#define TNUM(_v, _m) (struct tnum){.value = _v, .mask = _m}
|
|
/* A completely unknown value */
|
|
const struct tnum tnum_unknown = { .value = 0, .mask = -1 };
|
|
|
|
struct tnum tnum_const(u64 value)
|
|
{
|
|
return TNUM(value, 0);
|
|
}
|
|
|
|
struct tnum tnum_range(u64 min, u64 max)
|
|
{
|
|
u64 chi = min ^ max, delta;
|
|
u8 bits = fls64(chi);
|
|
|
|
/* special case, needed because 1ULL << 64 is undefined */
|
|
if (bits > 63)
|
|
return tnum_unknown;
|
|
/* e.g. if chi = 4, bits = 3, delta = (1<<3) - 1 = 7.
|
|
* if chi = 0, bits = 0, delta = (1<<0) - 1 = 0, so we return
|
|
* constant min (since min == max).
|
|
*/
|
|
delta = (1ULL << bits) - 1;
|
|
return TNUM(min & ~delta, delta);
|
|
}
|
|
|
|
struct tnum tnum_lshift(struct tnum a, u8 shift)
|
|
{
|
|
return TNUM(a.value << shift, a.mask << shift);
|
|
}
|
|
|
|
struct tnum tnum_rshift(struct tnum a, u8 shift)
|
|
{
|
|
return TNUM(a.value >> shift, a.mask >> shift);
|
|
}
|
|
|
|
struct tnum tnum_arshift(struct tnum a, u8 min_shift, u8 insn_bitness)
|
|
{
|
|
/* if a.value is negative, arithmetic shifting by minimum shift
|
|
* will have larger negative offset compared to more shifting.
|
|
* If a.value is nonnegative, arithmetic shifting by minimum shift
|
|
* will have larger positive offset compare to more shifting.
|
|
*/
|
|
if (insn_bitness == 32)
|
|
return TNUM((u32)(((s32)a.value) >> min_shift),
|
|
(u32)(((s32)a.mask) >> min_shift));
|
|
else
|
|
return TNUM((s64)a.value >> min_shift,
|
|
(s64)a.mask >> min_shift);
|
|
}
|
|
|
|
struct tnum tnum_add(struct tnum a, struct tnum b)
|
|
{
|
|
u64 sm, sv, sigma, chi, mu;
|
|
|
|
sm = a.mask + b.mask;
|
|
sv = a.value + b.value;
|
|
sigma = sm + sv;
|
|
chi = sigma ^ sv;
|
|
mu = chi | a.mask | b.mask;
|
|
return TNUM(sv & ~mu, mu);
|
|
}
|
|
|
|
struct tnum tnum_sub(struct tnum a, struct tnum b)
|
|
{
|
|
u64 dv, alpha, beta, chi, mu;
|
|
|
|
dv = a.value - b.value;
|
|
alpha = dv + a.mask;
|
|
beta = dv - b.mask;
|
|
chi = alpha ^ beta;
|
|
mu = chi | a.mask | b.mask;
|
|
return TNUM(dv & ~mu, mu);
|
|
}
|
|
|
|
struct tnum tnum_and(struct tnum a, struct tnum b)
|
|
{
|
|
u64 alpha, beta, v;
|
|
|
|
alpha = a.value | a.mask;
|
|
beta = b.value | b.mask;
|
|
v = a.value & b.value;
|
|
return TNUM(v, alpha & beta & ~v);
|
|
}
|
|
|
|
struct tnum tnum_or(struct tnum a, struct tnum b)
|
|
{
|
|
u64 v, mu;
|
|
|
|
v = a.value | b.value;
|
|
mu = a.mask | b.mask;
|
|
return TNUM(v, mu & ~v);
|
|
}
|
|
|
|
struct tnum tnum_xor(struct tnum a, struct tnum b)
|
|
{
|
|
u64 v, mu;
|
|
|
|
v = a.value ^ b.value;
|
|
mu = a.mask | b.mask;
|
|
return TNUM(v & ~mu, mu);
|
|
}
|
|
|
|
/* half-multiply add: acc += (unknown * mask * value).
|
|
* An intermediate step in the multiply algorithm.
|
|
*/
|
|
static struct tnum hma(struct tnum acc, u64 value, u64 mask)
|
|
{
|
|
while (mask) {
|
|
if (mask & 1)
|
|
acc = tnum_add(acc, TNUM(0, value));
|
|
mask >>= 1;
|
|
value <<= 1;
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
struct tnum tnum_mul(struct tnum a, struct tnum b)
|
|
{
|
|
struct tnum acc;
|
|
u64 pi;
|
|
|
|
pi = a.value * b.value;
|
|
acc = hma(TNUM(pi, 0), a.mask, b.mask | b.value);
|
|
return hma(acc, b.mask, a.value);
|
|
}
|
|
|
|
/* Note that if a and b disagree - i.e. one has a 'known 1' where the other has
|
|
* a 'known 0' - this will return a 'known 1' for that bit.
|
|
*/
|
|
struct tnum tnum_intersect(struct tnum a, struct tnum b)
|
|
{
|
|
u64 v, mu;
|
|
|
|
v = a.value | b.value;
|
|
mu = a.mask & b.mask;
|
|
return TNUM(v & ~mu, mu);
|
|
}
|
|
|
|
struct tnum tnum_cast(struct tnum a, u8 size)
|
|
{
|
|
a.value &= (1ULL << (size * 8)) - 1;
|
|
a.mask &= (1ULL << (size * 8)) - 1;
|
|
return a;
|
|
}
|
|
|
|
bool tnum_is_aligned(struct tnum a, u64 size)
|
|
{
|
|
if (!size)
|
|
return true;
|
|
return !((a.value | a.mask) & (size - 1));
|
|
}
|
|
|
|
bool tnum_in(struct tnum a, struct tnum b)
|
|
{
|
|
if (b.mask & ~a.mask)
|
|
return false;
|
|
b.value &= ~a.mask;
|
|
return a.value == b.value;
|
|
}
|
|
|
|
int tnum_strn(char *str, size_t size, struct tnum a)
|
|
{
|
|
return snprintf(str, size, "(%#llx; %#llx)", a.value, a.mask);
|
|
}
|
|
EXPORT_SYMBOL_GPL(tnum_strn);
|
|
|
|
int tnum_sbin(char *str, size_t size, struct tnum a)
|
|
{
|
|
size_t n;
|
|
|
|
for (n = 64; n; n--) {
|
|
if (n < size) {
|
|
if (a.mask & 1)
|
|
str[n - 1] = 'x';
|
|
else if (a.value & 1)
|
|
str[n - 1] = '1';
|
|
else
|
|
str[n - 1] = '0';
|
|
}
|
|
a.mask >>= 1;
|
|
a.value >>= 1;
|
|
}
|
|
str[min(size - 1, (size_t)64)] = 0;
|
|
return 64;
|
|
}
|