Squashed commit of the following: commit 259593385c05a430c4685b611c0e43b4272c22f8 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 08:30:37 2024 -0500 bpf: squash revert spoofing and some backports: Squashed commit of the following: commit 8ac5df9c8bc9575059fff6cea0c40463b96fc129 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:58:17 2024 -0500 Revert "BACKPORT: bpf: add skb_load_bytes_relative helper" This reverts commit 029893dcc5d67af16fdf0723bacaae37ec567f67. commit dbcbceafe848744ec188f74e87e9717916d359ea Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:58:13 2024 -0500 Revert "BACKPORT: bpf: encapsulate verifier log state into a structure" This reverts commit d861145b97d247cbd9fe1400df52155f48639126. commit 478f4dfee0406b54525e68764cc9ba48af1624fc Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:58:10 2024 -0500 Revert "BACKPORT: bpf: Rename bpf_verifer_log" This reverts commit 5d088635de1bf2d6ae9ea94e3dd1c601d30c0cce. commit 7bc7c24beb82168b49337530cb56b5dfeeafe19a Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:58:07 2024 -0500 Revert "BACKPORT: bpf: btf: Introduce BPF Type Format (BTF)" This reverts commit 93d34e26514b4d9d15fd176706f57634b2e97485. commit 7106457ba90a459b6241fdd44df658c1b52c0e4b Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:58:03 2024 -0500 Revert "bpf: Update logging functions to work with BTF" This reverts commit 97e6c528eb2f76c58a3b6a4c1e7fbeafcd97633a. commit 08e68c7ba56f5e78fd1afcd5a2164716a75b0fe3 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:58:00 2024 -0500 Revert "bpf: btf: Validate type reference" This reverts commit c7b7eecbc1134e5d8865af2cc0692fc7156175d5. commit 7763cf0831970a64ed62f9b7362fca02ab6e83f1 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:57:51 2024 -0500 Revert "bpf: btf: Check members of struct/union" This reverts commit 9a77b51cad6f04866ca067ca0e70a89b9f59ed56. commit eb033235f666b5f66995f4cf89702de7ab4721f8 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:57:47 2024 -0500 Revert "bpf: btf: Add pretty print capability for data with BTF type info" This reverts commit 745692103435221d6e39bc177811769995540525. commit c32995674ace91e06c591d2f63177585e81adc75 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:57:43 2024 -0500 Revert "BACKPORT: bpf: btf: Add BPF_BTF_LOAD command" This reverts commit 4e0afd38e20e5aa2df444361309bc07251ca6b2a. commit 1310bc8d4aca0015c8723e7624121eddf76b3244 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:57:38 2024 -0500 Revert "bpf: btf: Add BPF_OBJ_GET_INFO_BY_FD support to BTF fd" This reverts commit d4b5d76d9101b97e6fe5181bcefe7f601ed19926. commit 881a49445608712bdb0a0f0c959838bdbc725f62 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:57:34 2024 -0500 Revert "BACKPORT: bpf: btf: Clean up btf.h in uapi" This reverts commit 26b661822933d41b3feb59bb284334bfbbc82af4. commit e2109fd858ebd5fe392c8bf579b9350fbca35a35 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:57:29 2024 -0500 Revert "bpf: btf: Avoid WARN_ON when CONFIG_REFCOUNT_FULL=y" This reverts commit 9abf878903404e649fef4ad0b189eec1c13d29fe. commit 088a7d9137f03da4e0fc1d72add3901823081ccd Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:57:23 2024 -0500 Revert "bpf: Fix compiler warning on info.map_ids for 32bit platform" This reverts commit a3a278e1f6cf167d538ac52f4ad60bb9cf8d4129. commit 6e14aed6b63f2b266982454d83678445c062cf39 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:57:13 2024 -0500 Revert "bpf: btf: Change how section is supported in btf_header" This reverts commit 4b60ffd683eb623a184b46761777838d7c49e707. commit 151a60855c23bf0317734031481d779efb369d6c Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:57:08 2024 -0500 Revert "bpf: btf: Check array->index_type" This reverts commit b00e10f1a073fadce178b6fb62496722e16db303. commit 49775e9074a54ac5f60f518e6fc5a26172996eae Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:57:01 2024 -0500 Revert "bpf: btf: Remove unused bits from uapi/linux/btf.h" This reverts commit c90c6ad34f7a8f565f351d21c2d5b9706838767d. commit b6d6c6ab28e4b018da6ce9e64125e63f4191d3d9 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:56:58 2024 -0500 Revert "bpf: btf: Avoid variable length array" This reverts commit fe7d1f7750242e77a73839d173ac36c3e39d4171. commit a45bedecb9b1175fef96f2d64fba2d61777dbf35 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:56:49 2024 -0500 Revert "bpf: btf: avoid -Wreturn-type warning" This reverts commit 78214f1e390bf1d69d9ae4ee80072ac85c34619e. commit 445efb8465b9fa5706d81098417f15656265322e Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:56:46 2024 -0500 Revert "bpf: btf: Check array t->size" This reverts commit aed532e7466f77885a362e4b863bf90c41e834ba. commit 8aada590d525de735cf39196d88722e727c141e9 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:56:42 2024 -0500 Revert "bpf: btf: Ensure t->type == 0 for BTF_KIND_FWD" This reverts commit 8c8b601dcc2e62e1276b73dfee8b49e40fb65944. commit ed67ad09e866c9c30897488088bbb4555ea3dc80 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:56:38 2024 -0500 Revert "bpf: btf: Fix bitfield extraction for big endian" This reverts commit b0696a226c52868d64963f01665dd1a640a92f2b. commit 5cc64db782daf86cdf7ac77133ca94181bb29146 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:56:35 2024 -0500 Revert "bpf: btf: Clean up BTF_INT_BITS() in uapi btf.h" This reverts commit 0f008594540b09c667ea88fc87cf289b8db334da. commit 3a5c6b9010426449c08ecdcc10e758431b1e515f Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:56:31 2024 -0500 Revert "bpf: btf: Ensure the member->offset is in the right order" This reverts commit c5e361ecd6d45a7cdbffda02e4691a7a37198bdd. commit bd6173c1ac458b08d6cedaf06e6e53c93e6b0cc5 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:56:26 2024 -0500 Revert "bpf: fix bpf_skb_load_bytes_relative pkt length check" This reverts commit 9ea14969874cd7896588df435c890f6f2f547821. commit 0b61d26b25a65d9ded4611426c6da9c78e41567c Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:56:22 2024 -0500 Revert "bpf: btf: Fix end boundary calculation for type section" This reverts commit 08ef221c7fb604cb60c490fa999ec7254d492f05. commit 72fb2b9bb5b90f60ab71915fe4e57eeee3308163 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:56:18 2024 -0500 Revert "bpf: btf: Fix a missing check bug" This reverts commit 594687e3e01e26086f3b0173e5eda9b9f0b672f8. commit 575a34ceba4013ad0230038f29f6ea0b3ba41a7e Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:56:15 2024 -0500 Revert "bpf, btf: fix a missing check bug in btf_parse" This reverts commit 6bf31bbc438663756e92fb0aad4f5a35fd730fb0. commit bcca98c0bc5e19b38af3ddcd0feee80ad26e1f96 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:56:11 2024 -0500 Revert "bpf: fix BTF limits" This reverts commit e351b26ae671dfacd82f27c1c5f66cf8089d930d. commit f71c484e340041d8828c94b39a233ea587d8cc09 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:56:07 2024 -0500 Revert "bpf/btf: Fix BTF verification of enum members in struct/union" This reverts commit 861e65b744c171d59850e61a01715f194f25e45c. commit eca310722a2624d33cd49884aa18c36d435b10f8 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:56:02 2024 -0500 Revert "bpf: btf: fix truncated last_member_type_id in btf_struct_resolve" This reverts commit d6cd1eac41b10e606ec7f445162a0617c01be973. commit caae5c99a3ca7bed0e318b31b6aa7ca8260a1c52 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:55:58 2024 -0500 Revert "BACKPORT: net: bpf: rename ndo_xdp to ndo_bpf" This reverts commit 2a1ddcb6a384745195d57b4e4cdda2a55d2cbe47. commit f90bdcdaa095a4f10268bb740470a3e0893be21b Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:55:54 2024 -0500 Revert "BACKPORT: bpf: offload: add infrastructure for loading programs for a specific netdev" This reverts commit a9516d402726094eafccce26a99cf5110d188be9. commit c6e0ce9019c06d9a45c030a2bc38eed320afd45a Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:55:50 2024 -0500 Revert "bpf: offload: rename the ifindex field" This reverts commit 36bc9c7351a1dc78b3e71571998af381e876b4cb. commit 88b6a4d41b69df804b846a8ebdca410517e08343 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:55:46 2024 -0500 Revert "BACKPORT: bpf: Check attach type at prog load time" This reverts commit fe5a0d514e4970d86983458136d4a2f6caeee365. commit 9ccfaa66a5ea042331f0aacdb3667e23c8ed363e Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:55:43 2024 -0500 Revert "BACKPORT: bpf: introduce BPF_PROG_QUERY command" This reverts commit a5720688858170f1054f9549b5a628db1c252a88. commit adab2743b3fa0853d0351b33b0a286de745025e5 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:55:37 2024 -0500 Revert "BACKPORT: bpf: Hooks for sys_bind" This reverts commit e484887c7e7aa026521ddc1773233368a6304b24. commit d462e09db98ad89b3a836f9b9a925812b0d8cfe7 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:55:33 2024 -0500 Revert "BACKPORT: net: Introduce __inet_bind() and __inet6_bind" This reverts commit 41a3131c3e94c28fd084dd6f4358baee3824fd17. commit cdf7f55dc65b4bdf7ecfc924be77c6a039709b3d Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:55:29 2024 -0500 Revert "BACKPORT: bpf: Hooks for sys_connect" This reverts commit f26fe7233e2885ef489707ab5a5a5dda9f081b80. commit 97685d5058f76ba4ea6dd2db157f4537f3a8953d Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:55:23 2024 -0500 Revert "BACKPORT: bpf: Post-hooks for sys_bind" This reverts commit 284ac5bc7c70dac338301445e94e1ad40fb40fdb. commit d03d9c05036d3109eae643f473cc5a5ad0a80721 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:55:19 2024 -0500 Revert "kernel: bpf: devmap: Create __dev_map_alloc_node" This reverts commit db726149fa9abfd1ca9add3e2db6b1524f7e90a3. commit 8c34bcb3e4c6630799764871b4af2e5f9344a371 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:55:15 2024 -0500 Revert "BACKPORT: xdp: Add devmap_hash map type for looking up devices by hashed index" This reverts commit c4d4e1d201d8433e06b2ac66041d7105095a0204. commit ef277c7b3a08fd59943eb2b47af64afc513de008 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:55:11 2024 -0500 Revert "BACKPORT: devmap: Allow map lookups from eBPF" This reverts commit 24d196375871c72de0de977de79afede5a7d1780. commit 4fcd87869c55c28ed59bff916d640147601816d2 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:55:07 2024 -0500 Revert "gen_headers_{arm, arm64}: Add btf.h to the list" This reverts commit 37edfe7c90bac355885ffec3327b338a34619792. commit b89560e0b405b58ecc5fc12c15ad4f56147760d6 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:55:03 2024 -0500 Revert "syscall: Fake uname to 4.19 for bpfloader/netd" This reverts commit 186e74af61269602d0c068d98928b1f25e03eba2. commit fd49f8c35eb7875d6810a5a52877ebc59bfd4530 Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:54:59 2024 -0500 Revert "syscall: Fake uname to 4.19 also for netbpfload" This reverts commit 34b9a1ab387d7dc83ede613b2c12b3741ea08edb. commit b853fcf2ff892664d0ff522ca7fd530bc94c023e Author: John Galt <johngaltfirstrun@gmail.com> Date: Fri Dec 13 07:54:53 2024 -0500 Revert "syscall: Increase bpf fake uname to 5.4" This reverts commit 9cdc014e11b410a7f03d8c968a35ee0dd6a28fff. # Conflicts: # net/ipv4/af_inet.c # net/ipv6/af_inet6.c commit 4a0143fa36d300485650dc447b580151a69a3be2 Author: kondors1995 <normandija1945@gmail.com> Date: Wed Dec 18 13:48:16 2024 +0200 Revert "syscall: Fake uname to 4.19 for bpfloader/netd" This reverts commit417f37c97f. commit 6f512c5c7341a51d7bbc9cdd93814764cae8868f Author: kondors1995 <normandija1945@gmail.com> Date: Wed Dec 18 13:48:16 2024 +0200 Revert "syscall: Fake uname to 4.19 also for netbpfload" This reverts commita4c61c3d97. commit 41f326616251f0122d81e518082ef7faaad4b2e5 Author: kondors1995 <normandija1945@gmail.com> Date: Wed Dec 18 13:48:15 2024 +0200 Revert "syscall: Increase bpf fake uname to 5.4" This reverts commit4a906017d4. commit a0d3db72a836096cf533516d56c81a43150976ed Author: kondors1995 <normandija1945@gmail.com> Date: Wed Dec 18 13:46:12 2024 +0200 Revert "bpf: Hooks for sys_sendmsg" This reverts commit735c155332. commit 246eb3d90b95e0ab5aee8d5a9e9cd639c7beb174 Author: kondors1995 <normandija1945@gmail.com> Date: Wed Dec 18 13:45:08 2024 +0200 Revert "syscall: Increase fake uname to 6.6.40" This reverts commit92494b9920. commit c56eaa5b7f170f58f2ade14bb71aaad2964b9018 Author: kondors1995 <normandija1945@gmail.com> Date: Mon Dec 9 21:35:20 2024 +0200 raphael_defconfig: increase sbalance pooling rate to 10s commit54d190b8afAuthor: Sultan Alsawaf <sultan@kerneltoast.com> Date: Wed Dec 4 15:53:22 2024 -0800 sbalance: Fix severe misattribution of movable IRQs to the last active CPU Due to a horrible omission in the big IRQ list traversal, all movable IRQs are misattributed to the last active CPU in the system since that's what `bd` is last set to in the loop prior. This horribly breaks SBalance's notion of balance, producing nonsensical balancing decisions and failing to balance IRQs even when they are heavily imbalanced. Fix the massive breakage by adding the missing line of code to set `bd` to the CPU an IRQ actually belongs to, so that it's added to the correct CPU's movable IRQs list. Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com> commitf2fa2db581Author: Sultan Alsawaf <sultan@kerneltoast.com> Date: Wed Dec 4 14:31:52 2024 -0800 sbalance: Don't race with CPU hotplug When a CPU is hotplugged, cpu_active_mask is modified without any RCU synchronization. As a result, the only synchronization for cpu_active_mask provided by the hotplug code is the CPU hotplug lock. Furthermore, since IRQ balance is majorly disrupted during CPU hotplug due to mass IRQ migration off a dying CPU, SBalance just shouldn't operate while a CPU hotplug is in progress. Take the CPU hotplug lock in balance_irqs() to prevent races and mishaps during CPU hotplugs. Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com> commita4e81ff60aAuthor: Sultan Alsawaf <sultan@kerneltoast.com> Date: Wed Dec 4 14:16:48 2024 -0800 sbalance: Convert various IRQ counter types to unsigned ints These counted values are actually unsigned ints, not unsigned longs. Convert them to unsigned ints since there's no reason for them to be longs. Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
1780 lines
40 KiB
C
1780 lines
40 KiB
C
/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of version 2 of the GNU General Public
|
|
* License as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*/
|
|
#include <linux/bpf.h>
|
|
#include <linux/bpf_trace.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/sched/signal.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/mmzone.h>
|
|
#include <linux/anon_inodes.h>
|
|
#include <linux/file.h>
|
|
#include <linux/license.h>
|
|
#include <linux/filter.h>
|
|
#include <linux/version.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/idr.h>
|
|
#include <linux/cred.h>
|
|
#include <linux/timekeeping.h>
|
|
#include <linux/ctype.h>
|
|
|
|
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
|
|
(map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
|
|
(map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
|
|
(map)->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
|
|
#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
|
|
#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map))
|
|
|
|
#define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY)
|
|
|
|
DEFINE_PER_CPU(int, bpf_prog_active);
|
|
static DEFINE_IDR(prog_idr);
|
|
static DEFINE_SPINLOCK(prog_idr_lock);
|
|
static DEFINE_IDR(map_idr);
|
|
static DEFINE_SPINLOCK(map_idr_lock);
|
|
|
|
int sysctl_unprivileged_bpf_disabled __read_mostly =
|
|
IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0;
|
|
|
|
static const struct bpf_map_ops * const bpf_map_types[] = {
|
|
#define BPF_PROG_TYPE(_id, _ops)
|
|
#define BPF_MAP_TYPE(_id, _ops) \
|
|
[_id] = &_ops,
|
|
#include <linux/bpf_types.h>
|
|
#undef BPF_PROG_TYPE
|
|
#undef BPF_MAP_TYPE
|
|
};
|
|
|
|
/*
|
|
* If we're handed a bigger struct than we know of, ensure all the unknown bits
|
|
* are 0 - i.e. new user-space does not rely on any kernel feature extensions
|
|
* we don't know about yet.
|
|
*
|
|
* There is a ToCToU between this function call and the following
|
|
* copy_from_user() call. However, this is not a concern since this function is
|
|
* meant to be a future-proofing of bits.
|
|
*/
|
|
static int check_uarg_tail_zero(void __user *uaddr,
|
|
size_t expected_size,
|
|
size_t actual_size)
|
|
{
|
|
unsigned char __user *addr;
|
|
unsigned char __user *end;
|
|
unsigned char val;
|
|
int err;
|
|
|
|
if (unlikely(actual_size > PAGE_SIZE)) /* silly large */
|
|
return -E2BIG;
|
|
|
|
if (unlikely(!access_ok(VERIFY_READ, uaddr, actual_size)))
|
|
return -EFAULT;
|
|
|
|
if (actual_size <= expected_size)
|
|
return 0;
|
|
|
|
addr = uaddr + expected_size;
|
|
end = uaddr + actual_size;
|
|
|
|
for (; addr < end; addr++) {
|
|
err = get_user(val, addr);
|
|
if (err)
|
|
return err;
|
|
if (val)
|
|
return -E2BIG;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
|
|
{
|
|
struct bpf_map *map;
|
|
|
|
if (attr->map_type >= ARRAY_SIZE(bpf_map_types) ||
|
|
!bpf_map_types[attr->map_type])
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
map = bpf_map_types[attr->map_type]->map_alloc(attr);
|
|
if (IS_ERR(map))
|
|
return map;
|
|
map->ops = bpf_map_types[attr->map_type];
|
|
map->map_type = attr->map_type;
|
|
return map;
|
|
}
|
|
|
|
void *bpf_map_area_alloc(size_t size, int numa_node)
|
|
{
|
|
/* We definitely need __GFP_NORETRY, so OOM killer doesn't
|
|
* trigger under memory pressure as we really just want to
|
|
* fail instead.
|
|
*/
|
|
const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO;
|
|
void *area;
|
|
|
|
if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
|
|
area = kmalloc_node(size, GFP_USER | flags, numa_node);
|
|
if (area != NULL)
|
|
return area;
|
|
}
|
|
|
|
return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags,
|
|
__builtin_return_address(0));
|
|
}
|
|
|
|
void bpf_map_area_free(void *area)
|
|
{
|
|
kvfree(area);
|
|
}
|
|
|
|
int bpf_map_precharge_memlock(u32 pages)
|
|
{
|
|
struct user_struct *user = get_current_user();
|
|
unsigned long memlock_limit, cur;
|
|
|
|
memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
|
cur = atomic_long_read(&user->locked_vm);
|
|
free_uid(user);
|
|
if (cur + pages > memlock_limit)
|
|
return -EPERM;
|
|
return 0;
|
|
}
|
|
|
|
static int bpf_map_charge_memlock(struct bpf_map *map)
|
|
{
|
|
struct user_struct *user = get_current_user();
|
|
unsigned long memlock_limit;
|
|
|
|
memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
|
|
|
atomic_long_add(map->pages, &user->locked_vm);
|
|
|
|
if (atomic_long_read(&user->locked_vm) > memlock_limit) {
|
|
atomic_long_sub(map->pages, &user->locked_vm);
|
|
free_uid(user);
|
|
return -EPERM;
|
|
}
|
|
map->user = user;
|
|
return 0;
|
|
}
|
|
|
|
static void bpf_map_uncharge_memlock(struct bpf_map *map)
|
|
{
|
|
struct user_struct *user = map->user;
|
|
|
|
atomic_long_sub(map->pages, &user->locked_vm);
|
|
free_uid(user);
|
|
}
|
|
|
|
static int bpf_map_alloc_id(struct bpf_map *map)
|
|
{
|
|
int id;
|
|
|
|
spin_lock_bh(&map_idr_lock);
|
|
id = idr_alloc_cyclic(&map_idr, map, 1, INT_MAX, GFP_ATOMIC);
|
|
if (id > 0)
|
|
map->id = id;
|
|
spin_unlock_bh(&map_idr_lock);
|
|
|
|
if (WARN_ON_ONCE(!id))
|
|
return -ENOSPC;
|
|
|
|
return id > 0 ? 0 : id;
|
|
}
|
|
|
|
static void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
|
|
{
|
|
unsigned long flags;
|
|
|
|
if (do_idr_lock)
|
|
spin_lock_irqsave(&map_idr_lock, flags);
|
|
else
|
|
__acquire(&map_idr_lock);
|
|
|
|
idr_remove(&map_idr, map->id);
|
|
|
|
if (do_idr_lock)
|
|
spin_unlock_irqrestore(&map_idr_lock, flags);
|
|
else
|
|
__release(&map_idr_lock);
|
|
}
|
|
|
|
/* called from workqueue */
|
|
static void bpf_map_free_deferred(struct work_struct *work)
|
|
{
|
|
struct bpf_map *map = container_of(work, struct bpf_map, work);
|
|
|
|
bpf_map_uncharge_memlock(map);
|
|
security_bpf_map_free(map);
|
|
/* implementation dependent freeing */
|
|
map->ops->map_free(map);
|
|
}
|
|
|
|
static void bpf_map_put_uref(struct bpf_map *map)
|
|
{
|
|
if (atomic_dec_and_test(&map->usercnt)) {
|
|
if (map->ops->map_release_uref)
|
|
map->ops->map_release_uref(map);
|
|
}
|
|
}
|
|
|
|
/* decrement map refcnt and schedule it for freeing via workqueue
|
|
* (unrelying map implementation ops->map_free() might sleep)
|
|
*/
|
|
static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock)
|
|
{
|
|
if (atomic_dec_and_test(&map->refcnt)) {
|
|
/* bpf_map_free_id() must be called first */
|
|
bpf_map_free_id(map, do_idr_lock);
|
|
INIT_WORK(&map->work, bpf_map_free_deferred);
|
|
schedule_work(&map->work);
|
|
}
|
|
}
|
|
|
|
void bpf_map_put(struct bpf_map *map)
|
|
{
|
|
__bpf_map_put(map, true);
|
|
}
|
|
|
|
void bpf_map_put_with_uref(struct bpf_map *map)
|
|
{
|
|
bpf_map_put_uref(map);
|
|
bpf_map_put(map);
|
|
}
|
|
|
|
static int bpf_map_release(struct inode *inode, struct file *filp)
|
|
{
|
|
struct bpf_map *map = filp->private_data;
|
|
|
|
if (map->ops->map_release)
|
|
map->ops->map_release(map, filp);
|
|
|
|
bpf_map_put_with_uref(map);
|
|
return 0;
|
|
}
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
|
|
{
|
|
const struct bpf_map *map = filp->private_data;
|
|
const struct bpf_array *array;
|
|
u32 owner_prog_type = 0;
|
|
u32 owner_jited = 0;
|
|
|
|
if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
|
|
array = container_of(map, struct bpf_array, map);
|
|
owner_prog_type = array->owner_prog_type;
|
|
owner_jited = array->owner_jited;
|
|
}
|
|
|
|
seq_printf(m,
|
|
"map_type:\t%u\n"
|
|
"key_size:\t%u\n"
|
|
"value_size:\t%u\n"
|
|
"max_entries:\t%u\n"
|
|
"map_flags:\t%#x\n"
|
|
"memlock:\t%llu\n",
|
|
map->map_type,
|
|
map->key_size,
|
|
map->value_size,
|
|
map->max_entries,
|
|
map->map_flags,
|
|
map->pages * 1ULL << PAGE_SHIFT);
|
|
|
|
if (owner_prog_type) {
|
|
seq_printf(m, "owner_prog_type:\t%u\n",
|
|
owner_prog_type);
|
|
seq_printf(m, "owner_jited:\t%u\n",
|
|
owner_jited);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz,
|
|
loff_t *ppos)
|
|
{
|
|
/* We need this handler such that alloc_file() enables
|
|
* f_mode with FMODE_CAN_READ.
|
|
*/
|
|
return -EINVAL;
|
|
}
|
|
|
|
static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf,
|
|
size_t siz, loff_t *ppos)
|
|
{
|
|
/* We need this handler such that alloc_file() enables
|
|
* f_mode with FMODE_CAN_WRITE.
|
|
*/
|
|
return -EINVAL;
|
|
}
|
|
|
|
const struct file_operations bpf_map_fops = {
|
|
#ifdef CONFIG_PROC_FS
|
|
.show_fdinfo = bpf_map_show_fdinfo,
|
|
#endif
|
|
.release = bpf_map_release,
|
|
.read = bpf_dummy_read,
|
|
.write = bpf_dummy_write,
|
|
};
|
|
|
|
int bpf_map_new_fd(struct bpf_map *map, int flags)
|
|
{
|
|
int ret;
|
|
|
|
ret = security_bpf_map(map, OPEN_FMODE(flags));
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
|
|
flags | O_CLOEXEC);
|
|
}
|
|
|
|
int bpf_get_file_flag(int flags)
|
|
{
|
|
if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY))
|
|
return -EINVAL;
|
|
if (flags & BPF_F_RDONLY)
|
|
return O_RDONLY;
|
|
if (flags & BPF_F_WRONLY)
|
|
return O_WRONLY;
|
|
return O_RDWR;
|
|
}
|
|
|
|
/* helper macro to check that unused fields 'union bpf_attr' are zero */
|
|
#define CHECK_ATTR(CMD) \
|
|
memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
|
|
sizeof(attr->CMD##_LAST_FIELD), 0, \
|
|
sizeof(*attr) - \
|
|
offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
|
|
sizeof(attr->CMD##_LAST_FIELD)) != NULL
|
|
|
|
/* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes.
|
|
* Return 0 on success and < 0 on error.
|
|
*/
|
|
static int bpf_obj_name_cpy(char *dst, const char *src)
|
|
{
|
|
const char *end = src + BPF_OBJ_NAME_LEN;
|
|
|
|
memset(dst, 0, BPF_OBJ_NAME_LEN);
|
|
|
|
/* Copy all isalnum() and '_' char */
|
|
while (src < end && *src) {
|
|
if (!isalnum(*src) && *src != '_')
|
|
return -EINVAL;
|
|
*dst++ = *src++;
|
|
}
|
|
|
|
/* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */
|
|
if (src == end)
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
#define BPF_MAP_CREATE_LAST_FIELD map_name
|
|
/* called via syscall */
|
|
static int map_create(union bpf_attr *attr)
|
|
{
|
|
int numa_node = bpf_map_attr_numa_node(attr);
|
|
struct bpf_map *map;
|
|
int f_flags;
|
|
int err;
|
|
|
|
err = CHECK_ATTR(BPF_MAP_CREATE);
|
|
if (err)
|
|
return -EINVAL;
|
|
|
|
f_flags = bpf_get_file_flag(attr->map_flags);
|
|
if (f_flags < 0)
|
|
return f_flags;
|
|
|
|
if (numa_node != NUMA_NO_NODE &&
|
|
((unsigned int)numa_node >= nr_node_ids ||
|
|
!node_online(numa_node)))
|
|
return -EINVAL;
|
|
|
|
/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
|
|
map = find_and_alloc_map(attr);
|
|
if (IS_ERR(map))
|
|
return PTR_ERR(map);
|
|
|
|
err = bpf_obj_name_cpy(map->name, attr->map_name);
|
|
if (err)
|
|
goto free_map_nouncharge;
|
|
|
|
atomic_set(&map->refcnt, 1);
|
|
atomic_set(&map->usercnt, 1);
|
|
|
|
err = security_bpf_map_alloc(map);
|
|
if (err)
|
|
goto free_map_nouncharge;
|
|
|
|
err = bpf_map_charge_memlock(map);
|
|
if (err)
|
|
goto free_map_sec;
|
|
|
|
err = bpf_map_alloc_id(map);
|
|
if (err)
|
|
goto free_map;
|
|
|
|
err = bpf_map_new_fd(map, f_flags);
|
|
if (err < 0) {
|
|
/* failed to allocate fd.
|
|
* bpf_map_put_with_uref() is needed because the above
|
|
* bpf_map_alloc_id() has published the map
|
|
* to the userspace and the userspace may
|
|
* have refcnt-ed it through BPF_MAP_GET_FD_BY_ID.
|
|
*/
|
|
bpf_map_put_with_uref(map);
|
|
return err;
|
|
}
|
|
|
|
trace_bpf_map_create(map, err);
|
|
return err;
|
|
|
|
free_map:
|
|
bpf_map_uncharge_memlock(map);
|
|
free_map_sec:
|
|
security_bpf_map_free(map);
|
|
free_map_nouncharge:
|
|
map->ops->map_free(map);
|
|
return err;
|
|
}
|
|
|
|
/* if error is returned, fd is released.
|
|
* On success caller should complete fd access with matching fdput()
|
|
*/
|
|
struct bpf_map *__bpf_map_get(struct fd f)
|
|
{
|
|
if (!f.file)
|
|
return ERR_PTR(-EBADF);
|
|
if (f.file->f_op != &bpf_map_fops) {
|
|
fdput(f);
|
|
return ERR_PTR(-EINVAL);
|
|
}
|
|
|
|
return f.file->private_data;
|
|
}
|
|
|
|
/* prog's and map's refcnt limit */
|
|
#define BPF_MAX_REFCNT 32768
|
|
|
|
struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref)
|
|
{
|
|
if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) {
|
|
atomic_dec(&map->refcnt);
|
|
return ERR_PTR(-EBUSY);
|
|
}
|
|
if (uref)
|
|
atomic_inc(&map->usercnt);
|
|
return map;
|
|
}
|
|
|
|
struct bpf_map *bpf_map_get_with_uref(u32 ufd)
|
|
{
|
|
struct fd f = fdget(ufd);
|
|
struct bpf_map *map;
|
|
|
|
map = __bpf_map_get(f);
|
|
if (IS_ERR(map))
|
|
return map;
|
|
|
|
map = bpf_map_inc(map, true);
|
|
fdput(f);
|
|
|
|
return map;
|
|
}
|
|
|
|
/* map_idr_lock should have been held */
|
|
static struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map,
|
|
bool uref)
|
|
{
|
|
int refold;
|
|
|
|
refold = __atomic_add_unless(&map->refcnt, 1, 0);
|
|
|
|
if (refold >= BPF_MAX_REFCNT) {
|
|
__bpf_map_put(map, false);
|
|
return ERR_PTR(-EBUSY);
|
|
}
|
|
|
|
if (!refold)
|
|
return ERR_PTR(-ENOENT);
|
|
|
|
if (uref)
|
|
atomic_inc(&map->usercnt);
|
|
|
|
return map;
|
|
}
|
|
|
|
int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
|
|
{
|
|
return -ENOTSUPP;
|
|
}
|
|
|
|
/* last field in 'union bpf_attr' used by this command */
|
|
#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
|
|
|
|
static int map_lookup_elem(union bpf_attr *attr)
|
|
{
|
|
void __user *ukey = u64_to_user_ptr(attr->key);
|
|
void __user *uvalue = u64_to_user_ptr(attr->value);
|
|
int ufd = attr->map_fd;
|
|
struct bpf_map *map;
|
|
void *key, *value, *ptr;
|
|
u8 key_onstack[SZ_16] __aligned(sizeof(long));
|
|
u8 value_onstack[SZ_64] __aligned(sizeof(long));
|
|
u32 value_size;
|
|
struct fd f;
|
|
int err;
|
|
|
|
if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
|
|
return -EINVAL;
|
|
|
|
f = fdget(ufd);
|
|
map = __bpf_map_get(f);
|
|
if (IS_ERR(map))
|
|
return PTR_ERR(map);
|
|
|
|
if (!(f.file->f_mode & FMODE_CAN_READ)) {
|
|
err = -EPERM;
|
|
goto err_put;
|
|
}
|
|
|
|
if (map->key_size <= sizeof(key_onstack)) {
|
|
key = key_onstack;
|
|
if (copy_from_user(key, ukey, map->key_size)) {
|
|
err = -EFAULT;
|
|
goto err_put;
|
|
}
|
|
} else {
|
|
key = memdup_user(ukey, map->key_size);
|
|
if (IS_ERR(key)) {
|
|
err = PTR_ERR(key);
|
|
goto err_put;
|
|
}
|
|
}
|
|
|
|
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
|
|
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
|
|
map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
|
|
value_size = round_up(map->value_size, 8) * num_possible_cpus();
|
|
else if (IS_FD_MAP(map))
|
|
value_size = sizeof(u32);
|
|
else
|
|
value_size = map->value_size;
|
|
|
|
err = -ENOMEM;
|
|
if (value_size <= sizeof(value_onstack)) {
|
|
value = value_onstack;
|
|
} else {
|
|
value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
|
|
if (!value)
|
|
goto free_key;
|
|
}
|
|
|
|
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
|
|
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
|
|
err = bpf_percpu_hash_copy(map, key, value);
|
|
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
|
|
err = bpf_percpu_array_copy(map, key, value);
|
|
} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
|
|
err = bpf_stackmap_copy(map, key, value);
|
|
} else if (IS_FD_ARRAY(map)) {
|
|
err = bpf_fd_array_map_lookup_elem(map, key, value);
|
|
} else if (IS_FD_HASH(map)) {
|
|
err = bpf_fd_htab_map_lookup_elem(map, key, value);
|
|
} else {
|
|
rcu_read_lock();
|
|
if (map->ops->map_lookup_elem_sys_only)
|
|
ptr = map->ops->map_lookup_elem_sys_only(map, key);
|
|
else
|
|
ptr = map->ops->map_lookup_elem(map, key);
|
|
if (ptr)
|
|
memcpy(value, ptr, value_size);
|
|
rcu_read_unlock();
|
|
err = ptr ? 0 : -ENOENT;
|
|
}
|
|
|
|
if (err)
|
|
goto free_value;
|
|
|
|
err = -EFAULT;
|
|
if (copy_to_user(uvalue, value, value_size) != 0)
|
|
goto free_value;
|
|
|
|
trace_bpf_map_lookup_elem(map, ufd, key, value);
|
|
err = 0;
|
|
|
|
free_value:
|
|
if (value != value_onstack)
|
|
kfree(value);
|
|
free_key:
|
|
if (key != key_onstack)
|
|
kfree(key);
|
|
err_put:
|
|
fdput(f);
|
|
return err;
|
|
}
|
|
|
|
static void maybe_wait_bpf_programs(struct bpf_map *map)
|
|
{
|
|
/* Wait for any running BPF programs to complete so that
|
|
* userspace, when we return to it, knows that all programs
|
|
* that could be running use the new map value.
|
|
*/
|
|
if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
|
|
map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
|
|
synchronize_rcu();
|
|
}
|
|
|
|
#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
|
|
|
|
static int map_update_elem(union bpf_attr *attr)
|
|
{
|
|
void __user *ukey = u64_to_user_ptr(attr->key);
|
|
void __user *uvalue = u64_to_user_ptr(attr->value);
|
|
int ufd = attr->map_fd;
|
|
struct bpf_map *map;
|
|
void *key, *value;
|
|
u8 key_onstack[SZ_16] __aligned(sizeof(long));
|
|
u8 value_onstack[SZ_64] __aligned(sizeof(long));
|
|
u32 value_size;
|
|
struct fd f;
|
|
int err;
|
|
|
|
if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
|
|
return -EINVAL;
|
|
|
|
f = fdget(ufd);
|
|
map = __bpf_map_get(f);
|
|
if (IS_ERR(map))
|
|
return PTR_ERR(map);
|
|
|
|
if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
|
|
err = -EPERM;
|
|
goto err_put;
|
|
}
|
|
|
|
if (map->key_size <= sizeof(key_onstack)) {
|
|
key = key_onstack;
|
|
if (copy_from_user(key, ukey, map->key_size)) {
|
|
err = -EFAULT;
|
|
goto err_put;
|
|
}
|
|
} else {
|
|
key = memdup_user(ukey, map->key_size);
|
|
if (IS_ERR(key)) {
|
|
err = PTR_ERR(key);
|
|
goto err_put;
|
|
}
|
|
}
|
|
|
|
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
|
|
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
|
|
map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
|
|
value_size = round_up(map->value_size, 8) * num_possible_cpus();
|
|
else
|
|
value_size = map->value_size;
|
|
|
|
if (value_size <= sizeof(value_onstack)) {
|
|
value = value_onstack;
|
|
} else {
|
|
err = -ENOMEM;
|
|
value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
|
|
if (!value)
|
|
goto free_key;
|
|
}
|
|
|
|
err = -EFAULT;
|
|
if (copy_from_user(value, uvalue, value_size) != 0)
|
|
goto free_value;
|
|
|
|
/* must increment bpf_prog_active to avoid kprobe+bpf triggering from
|
|
* inside bpf map update or delete otherwise deadlocks are possible
|
|
*/
|
|
preempt_disable();
|
|
__this_cpu_inc(bpf_prog_active);
|
|
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
|
|
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
|
|
err = bpf_percpu_hash_update(map, key, value, attr->flags);
|
|
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
|
|
err = bpf_percpu_array_update(map, key, value, attr->flags);
|
|
} else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
|
|
map->map_type == BPF_MAP_TYPE_PROG_ARRAY ||
|
|
map->map_type == BPF_MAP_TYPE_CGROUP_ARRAY ||
|
|
map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
|
|
rcu_read_lock();
|
|
err = bpf_fd_array_map_update_elem(map, f.file, key, value,
|
|
attr->flags);
|
|
rcu_read_unlock();
|
|
} else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
|
|
rcu_read_lock();
|
|
err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
|
|
attr->flags);
|
|
rcu_read_unlock();
|
|
} else {
|
|
rcu_read_lock();
|
|
err = map->ops->map_update_elem(map, key, value, attr->flags);
|
|
rcu_read_unlock();
|
|
}
|
|
__this_cpu_dec(bpf_prog_active);
|
|
preempt_enable();
|
|
maybe_wait_bpf_programs(map);
|
|
|
|
if (!err)
|
|
trace_bpf_map_update_elem(map, ufd, key, value);
|
|
free_value:
|
|
if (value != value_onstack)
|
|
kfree(value);
|
|
free_key:
|
|
if (key != key_onstack)
|
|
kfree(key);
|
|
err_put:
|
|
fdput(f);
|
|
return err;
|
|
}
|
|
|
|
#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
|
|
|
|
static int map_delete_elem(union bpf_attr *attr)
|
|
{
|
|
void __user *ukey = u64_to_user_ptr(attr->key);
|
|
int ufd = attr->map_fd;
|
|
struct bpf_map *map;
|
|
struct fd f;
|
|
void *key;
|
|
u8 key_onstack[SZ_16] __aligned(sizeof(long));
|
|
int err;
|
|
|
|
if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
|
|
return -EINVAL;
|
|
|
|
f = fdget(ufd);
|
|
map = __bpf_map_get(f);
|
|
if (IS_ERR(map))
|
|
return PTR_ERR(map);
|
|
|
|
if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
|
|
err = -EPERM;
|
|
goto err_put;
|
|
}
|
|
|
|
if (map->key_size <= sizeof(key_onstack)) {
|
|
key = key_onstack;
|
|
if (copy_from_user(key, ukey, map->key_size)) {
|
|
err = -EFAULT;
|
|
goto err_put;
|
|
}
|
|
} else {
|
|
key = memdup_user(ukey, map->key_size);
|
|
if (IS_ERR(key)) {
|
|
err = PTR_ERR(key);
|
|
goto err_put;
|
|
}
|
|
}
|
|
|
|
preempt_disable();
|
|
__this_cpu_inc(bpf_prog_active);
|
|
rcu_read_lock();
|
|
err = map->ops->map_delete_elem(map, key);
|
|
rcu_read_unlock();
|
|
__this_cpu_dec(bpf_prog_active);
|
|
preempt_enable();
|
|
maybe_wait_bpf_programs(map);
|
|
|
|
if (!err)
|
|
trace_bpf_map_delete_elem(map, ufd, key);
|
|
if (key != key_onstack)
|
|
kfree(key);
|
|
err_put:
|
|
fdput(f);
|
|
return err;
|
|
}
|
|
|
|
/* last field in 'union bpf_attr' used by this command */
|
|
#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
|
|
|
|
static int map_get_next_key(union bpf_attr *attr)
|
|
{
|
|
void __user *ukey = u64_to_user_ptr(attr->key);
|
|
void __user *unext_key = u64_to_user_ptr(attr->next_key);
|
|
int ufd = attr->map_fd;
|
|
struct bpf_map *map;
|
|
void *key, *next_key;
|
|
u8 key_onstack[SZ_16] __aligned(sizeof(long));
|
|
u8 next_key_onstack[SZ_64] __aligned(sizeof(long));
|
|
struct fd f;
|
|
int err;
|
|
|
|
if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
|
|
return -EINVAL;
|
|
|
|
f = fdget(ufd);
|
|
map = __bpf_map_get(f);
|
|
if (IS_ERR(map))
|
|
return PTR_ERR(map);
|
|
|
|
if (!(f.file->f_mode & FMODE_CAN_READ)) {
|
|
err = -EPERM;
|
|
goto err_put;
|
|
}
|
|
|
|
if (ukey) {
|
|
if (map->key_size <= sizeof(key_onstack)) {
|
|
key = key_onstack;
|
|
if (copy_from_user(key, ukey, map->key_size)) {
|
|
err = -EFAULT;
|
|
goto err_put;
|
|
}
|
|
} else {
|
|
key = memdup_user(ukey, map->key_size);
|
|
if (IS_ERR(key)) {
|
|
err = PTR_ERR(key);
|
|
goto err_put;
|
|
}
|
|
}
|
|
} else {
|
|
key = NULL;
|
|
}
|
|
|
|
err = -ENOMEM;
|
|
if (map->key_size <= sizeof(next_key_onstack)) {
|
|
next_key = next_key_onstack;
|
|
} else {
|
|
next_key = kmalloc(map->key_size, GFP_USER);
|
|
if (!next_key)
|
|
goto free_key;
|
|
}
|
|
|
|
rcu_read_lock();
|
|
err = map->ops->map_get_next_key(map, key, next_key);
|
|
rcu_read_unlock();
|
|
if (err)
|
|
goto free_next_key;
|
|
|
|
err = -EFAULT;
|
|
if (copy_to_user(unext_key, next_key, map->key_size) != 0)
|
|
goto free_next_key;
|
|
|
|
trace_bpf_map_next_key(map, ufd, key, next_key);
|
|
err = 0;
|
|
|
|
free_next_key:
|
|
if (next_key != next_key_onstack)
|
|
kfree(next_key);
|
|
free_key:
|
|
if (key != key_onstack)
|
|
kfree(key);
|
|
err_put:
|
|
fdput(f);
|
|
return err;
|
|
}
|
|
|
|
static const struct bpf_verifier_ops * const bpf_prog_types[] = {
|
|
#define BPF_PROG_TYPE(_id, _ops) \
|
|
[_id] = &_ops,
|
|
#define BPF_MAP_TYPE(_id, _ops)
|
|
#include <linux/bpf_types.h>
|
|
#undef BPF_PROG_TYPE
|
|
#undef BPF_MAP_TYPE
|
|
};
|
|
|
|
static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
|
|
{
|
|
if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type])
|
|
return -EINVAL;
|
|
|
|
prog->aux->ops = bpf_prog_types[type];
|
|
prog->type = type;
|
|
return 0;
|
|
}
|
|
|
|
/* drop refcnt on maps used by eBPF program and free auxilary data */
|
|
static void free_used_maps(struct bpf_prog_aux *aux)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < aux->used_map_cnt; i++)
|
|
bpf_map_put(aux->used_maps[i]);
|
|
|
|
kfree(aux->used_maps);
|
|
}
|
|
|
|
int __bpf_prog_charge(struct user_struct *user, u32 pages)
|
|
{
|
|
unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
|
|
unsigned long user_bufs;
|
|
|
|
if (user) {
|
|
user_bufs = atomic_long_add_return(pages, &user->locked_vm);
|
|
if (user_bufs > memlock_limit) {
|
|
atomic_long_sub(pages, &user->locked_vm);
|
|
return -EPERM;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
|
|
{
|
|
if (user)
|
|
atomic_long_sub(pages, &user->locked_vm);
|
|
}
|
|
|
|
static int bpf_prog_charge_memlock(struct bpf_prog *prog)
|
|
{
|
|
struct user_struct *user = get_current_user();
|
|
int ret;
|
|
|
|
ret = __bpf_prog_charge(user, prog->pages);
|
|
if (ret) {
|
|
free_uid(user);
|
|
return ret;
|
|
}
|
|
|
|
prog->aux->user = user;
|
|
return 0;
|
|
}
|
|
|
|
static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
|
|
{
|
|
struct user_struct *user = prog->aux->user;
|
|
|
|
__bpf_prog_uncharge(user, prog->pages);
|
|
free_uid(user);
|
|
}
|
|
|
|
static int bpf_prog_alloc_id(struct bpf_prog *prog)
|
|
{
|
|
int id;
|
|
|
|
spin_lock_bh(&prog_idr_lock);
|
|
id = idr_alloc_cyclic(&prog_idr, prog, 1, INT_MAX, GFP_ATOMIC);
|
|
if (id > 0)
|
|
prog->aux->id = id;
|
|
spin_unlock_bh(&prog_idr_lock);
|
|
|
|
/* id is in [1, INT_MAX) */
|
|
if (WARN_ON_ONCE(!id))
|
|
return -ENOSPC;
|
|
|
|
return id > 0 ? 0 : id;
|
|
}
|
|
|
|
static void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock)
|
|
{
|
|
/* cBPF to eBPF migrations are currently not in the idr store. */
|
|
if (!prog->aux->id)
|
|
return;
|
|
|
|
if (do_idr_lock)
|
|
spin_lock_bh(&prog_idr_lock);
|
|
else
|
|
__acquire(&prog_idr_lock);
|
|
|
|
idr_remove(&prog_idr, prog->aux->id);
|
|
|
|
if (do_idr_lock)
|
|
spin_unlock_bh(&prog_idr_lock);
|
|
else
|
|
__release(&prog_idr_lock);
|
|
}
|
|
|
|
static void __bpf_prog_put_rcu(struct rcu_head *rcu)
|
|
{
|
|
struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
|
|
|
|
free_used_maps(aux);
|
|
bpf_prog_uncharge_memlock(aux->prog);
|
|
security_bpf_prog_free(aux);
|
|
bpf_prog_free(aux->prog);
|
|
}
|
|
|
|
static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
|
|
{
|
|
if (atomic_dec_and_test(&prog->aux->refcnt)) {
|
|
trace_bpf_prog_put_rcu(prog);
|
|
/* bpf_prog_free_id() must be called first */
|
|
bpf_prog_free_id(prog, do_idr_lock);
|
|
bpf_prog_kallsyms_del(prog);
|
|
call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
|
|
}
|
|
}
|
|
|
|
void bpf_prog_put(struct bpf_prog *prog)
|
|
{
|
|
__bpf_prog_put(prog, true);
|
|
}
|
|
EXPORT_SYMBOL_GPL(bpf_prog_put);
|
|
|
|
static int bpf_prog_release(struct inode *inode, struct file *filp)
|
|
{
|
|
struct bpf_prog *prog = filp->private_data;
|
|
|
|
bpf_prog_put(prog);
|
|
return 0;
|
|
}
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
|
|
{
|
|
const struct bpf_prog *prog = filp->private_data;
|
|
char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
|
|
|
|
bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
|
|
seq_printf(m,
|
|
"prog_type:\t%u\n"
|
|
"prog_jited:\t%u\n"
|
|
"prog_tag:\t%s\n"
|
|
"memlock:\t%llu\n",
|
|
prog->type,
|
|
prog->jited,
|
|
prog_tag,
|
|
prog->pages * 1ULL << PAGE_SHIFT);
|
|
}
|
|
#endif
|
|
|
|
const struct file_operations bpf_prog_fops = {
|
|
#ifdef CONFIG_PROC_FS
|
|
.show_fdinfo = bpf_prog_show_fdinfo,
|
|
#endif
|
|
.release = bpf_prog_release,
|
|
.read = bpf_dummy_read,
|
|
.write = bpf_dummy_write,
|
|
};
|
|
|
|
int bpf_prog_new_fd(struct bpf_prog *prog)
|
|
{
|
|
int ret;
|
|
|
|
ret = security_bpf_prog(prog);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
|
|
O_RDWR | O_CLOEXEC);
|
|
}
|
|
|
|
static struct bpf_prog *____bpf_prog_get(struct fd f)
|
|
{
|
|
if (!f.file)
|
|
return ERR_PTR(-EBADF);
|
|
if (f.file->f_op != &bpf_prog_fops) {
|
|
fdput(f);
|
|
return ERR_PTR(-EINVAL);
|
|
}
|
|
|
|
return f.file->private_data;
|
|
}
|
|
|
|
struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i)
|
|
{
|
|
if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) {
|
|
atomic_sub(i, &prog->aux->refcnt);
|
|
return ERR_PTR(-EBUSY);
|
|
}
|
|
return prog;
|
|
}
|
|
EXPORT_SYMBOL_GPL(bpf_prog_add);
|
|
|
|
void bpf_prog_sub(struct bpf_prog *prog, int i)
|
|
{
|
|
/* Only to be used for undoing previous bpf_prog_add() in some
|
|
* error path. We still know that another entity in our call
|
|
* path holds a reference to the program, thus atomic_sub() can
|
|
* be safely used in such cases!
|
|
*/
|
|
WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0);
|
|
}
|
|
EXPORT_SYMBOL_GPL(bpf_prog_sub);
|
|
|
|
struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog)
|
|
{
|
|
return bpf_prog_add(prog, 1);
|
|
}
|
|
EXPORT_SYMBOL_GPL(bpf_prog_inc);
|
|
|
|
/* prog_idr_lock should have been held */
|
|
struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog *prog)
|
|
{
|
|
int refold;
|
|
|
|
refold = __atomic_add_unless(&prog->aux->refcnt, 1, 0);
|
|
|
|
if (refold >= BPF_MAX_REFCNT) {
|
|
__bpf_prog_put(prog, false);
|
|
return ERR_PTR(-EBUSY);
|
|
}
|
|
|
|
if (!refold)
|
|
return ERR_PTR(-ENOENT);
|
|
|
|
return prog;
|
|
}
|
|
EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
|
|
|
|
static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
|
|
{
|
|
struct fd f = fdget(ufd);
|
|
struct bpf_prog *prog;
|
|
|
|
prog = ____bpf_prog_get(f);
|
|
if (IS_ERR(prog))
|
|
return prog;
|
|
if (type && prog->type != *type) {
|
|
prog = ERR_PTR(-EINVAL);
|
|
goto out;
|
|
}
|
|
|
|
prog = bpf_prog_inc(prog);
|
|
out:
|
|
fdput(f);
|
|
return prog;
|
|
}
|
|
|
|
struct bpf_prog *bpf_prog_get(u32 ufd)
|
|
{
|
|
return __bpf_prog_get(ufd, NULL);
|
|
}
|
|
|
|
struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
|
|
{
|
|
struct bpf_prog *prog = __bpf_prog_get(ufd, &type);
|
|
|
|
if (!IS_ERR(prog))
|
|
trace_bpf_prog_get_type(prog);
|
|
return prog;
|
|
}
|
|
EXPORT_SYMBOL_GPL(bpf_prog_get_type);
|
|
|
|
/* last field in 'union bpf_attr' used by this command */
|
|
#define BPF_PROG_LOAD_LAST_FIELD prog_name
|
|
|
|
static int bpf_prog_load(union bpf_attr *attr)
|
|
{
|
|
enum bpf_prog_type type = attr->prog_type;
|
|
struct bpf_prog *prog;
|
|
int err;
|
|
char license[128];
|
|
bool is_gpl;
|
|
|
|
if (CHECK_ATTR(BPF_PROG_LOAD))
|
|
return -EINVAL;
|
|
|
|
if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT)
|
|
return -EINVAL;
|
|
|
|
/* copy eBPF program license from user space */
|
|
if (strncpy_from_user(license, u64_to_user_ptr(attr->license),
|
|
sizeof(license) - 1) < 0)
|
|
return -EFAULT;
|
|
license[sizeof(license) - 1] = 0;
|
|
|
|
/* eBPF programs must be GPL compatible to use GPL-ed functions */
|
|
is_gpl = license_is_gpl_compatible(license);
|
|
|
|
if (attr->insn_cnt == 0 || attr->insn_cnt > BPF_MAXINSNS)
|
|
return -E2BIG;
|
|
|
|
if (type == BPF_PROG_TYPE_KPROBE &&
|
|
attr->kern_version != LINUX_VERSION_CODE)
|
|
return -EINVAL;
|
|
|
|
if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
|
|
type != BPF_PROG_TYPE_CGROUP_SKB &&
|
|
!capable(CAP_SYS_ADMIN))
|
|
return -EPERM;
|
|
|
|
/* plain bpf_prog allocation */
|
|
prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
|
|
if (!prog)
|
|
return -ENOMEM;
|
|
|
|
err = security_bpf_prog_alloc(prog->aux);
|
|
if (err)
|
|
goto free_prog_nouncharge;
|
|
|
|
err = bpf_prog_charge_memlock(prog);
|
|
if (err)
|
|
goto free_prog_sec;
|
|
|
|
prog->len = attr->insn_cnt;
|
|
|
|
err = -EFAULT;
|
|
if (copy_from_user(prog->insns, u64_to_user_ptr(attr->insns),
|
|
bpf_prog_insn_size(prog)) != 0)
|
|
goto free_prog;
|
|
|
|
prog->orig_prog = NULL;
|
|
prog->jited = 0;
|
|
|
|
atomic_set(&prog->aux->refcnt, 1);
|
|
prog->gpl_compatible = is_gpl ? 1 : 0;
|
|
|
|
/* find program type: socket_filter vs tracing_filter */
|
|
err = find_prog_type(type, prog);
|
|
if (err < 0)
|
|
goto free_prog;
|
|
|
|
prog->aux->load_time = ktime_get_boot_ns();
|
|
err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name);
|
|
if (err)
|
|
goto free_prog;
|
|
|
|
/* run eBPF verifier */
|
|
err = bpf_check(&prog, attr);
|
|
if (err < 0)
|
|
goto free_used_maps;
|
|
|
|
/* eBPF program is ready to be JITed */
|
|
prog = bpf_prog_select_runtime(prog, &err);
|
|
if (err < 0)
|
|
goto free_used_maps;
|
|
|
|
err = bpf_prog_alloc_id(prog);
|
|
if (err)
|
|
goto free_used_maps;
|
|
|
|
/* Upon success of bpf_prog_alloc_id(), the BPF prog is
|
|
* effectively publicly exposed. However, retrieving via
|
|
* bpf_prog_get_fd_by_id() will take another reference,
|
|
* therefore it cannot be gone underneath us.
|
|
*
|
|
* Only for the time /after/ successful bpf_prog_new_fd()
|
|
* and before returning to userspace, we might just hold
|
|
* one reference and any parallel close on that fd could
|
|
* rip everything out. Hence, below notifications must
|
|
* happen before bpf_prog_new_fd().
|
|
*
|
|
* Also, any failure handling from this point onwards must
|
|
* be using bpf_prog_put() given the program is exposed.
|
|
*/
|
|
bpf_prog_kallsyms_add(prog);
|
|
trace_bpf_prog_load(prog, err);
|
|
|
|
err = bpf_prog_new_fd(prog);
|
|
if (err < 0)
|
|
bpf_prog_put(prog);
|
|
return err;
|
|
|
|
free_used_maps:
|
|
free_used_maps(prog->aux);
|
|
free_prog:
|
|
bpf_prog_uncharge_memlock(prog);
|
|
free_prog_sec:
|
|
security_bpf_prog_free(prog->aux);
|
|
free_prog_nouncharge:
|
|
bpf_prog_free(prog);
|
|
return err;
|
|
}
|
|
|
|
#define BPF_OBJ_LAST_FIELD file_flags
|
|
|
|
static int bpf_obj_pin(const union bpf_attr *attr)
|
|
{
|
|
if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0)
|
|
return -EINVAL;
|
|
|
|
return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
|
|
}
|
|
|
|
static int bpf_obj_get(const union bpf_attr *attr)
|
|
{
|
|
if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 ||
|
|
attr->file_flags & ~BPF_OBJ_FLAG_MASK)
|
|
return -EINVAL;
|
|
|
|
return bpf_obj_get_user(u64_to_user_ptr(attr->pathname),
|
|
attr->file_flags);
|
|
}
|
|
|
|
#ifdef CONFIG_CGROUP_BPF
|
|
|
|
#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
|
|
|
|
static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach)
|
|
{
|
|
struct bpf_prog *prog = NULL;
|
|
int ufd = attr->target_fd;
|
|
struct bpf_map *map;
|
|
struct fd f;
|
|
int err;
|
|
|
|
f = fdget(ufd);
|
|
map = __bpf_map_get(f);
|
|
if (IS_ERR(map))
|
|
return PTR_ERR(map);
|
|
|
|
if (attach) {
|
|
prog = bpf_prog_get_type(attr->attach_bpf_fd,
|
|
BPF_PROG_TYPE_SK_SKB);
|
|
if (IS_ERR(prog)) {
|
|
fdput(f);
|
|
return PTR_ERR(prog);
|
|
}
|
|
}
|
|
|
|
err = sock_map_prog(map, prog, attr->attach_type);
|
|
if (err) {
|
|
fdput(f);
|
|
if (prog)
|
|
bpf_prog_put(prog);
|
|
return err;
|
|
}
|
|
|
|
fdput(f);
|
|
return 0;
|
|
}
|
|
|
|
#define BPF_F_ATTACH_MASK \
|
|
(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
|
|
|
|
static int bpf_prog_attach(const union bpf_attr *attr)
|
|
{
|
|
enum bpf_prog_type ptype;
|
|
struct bpf_prog *prog;
|
|
struct cgroup *cgrp;
|
|
int ret;
|
|
|
|
if (!capable(CAP_NET_ADMIN))
|
|
return -EPERM;
|
|
|
|
if (CHECK_ATTR(BPF_PROG_ATTACH))
|
|
return -EINVAL;
|
|
|
|
if (attr->attach_flags & ~BPF_F_ATTACH_MASK)
|
|
return -EINVAL;
|
|
|
|
switch (attr->attach_type) {
|
|
case BPF_CGROUP_INET_INGRESS:
|
|
case BPF_CGROUP_INET_EGRESS:
|
|
ptype = BPF_PROG_TYPE_CGROUP_SKB;
|
|
break;
|
|
case BPF_CGROUP_INET_SOCK_CREATE:
|
|
ptype = BPF_PROG_TYPE_CGROUP_SOCK;
|
|
break;
|
|
case BPF_CGROUP_SOCK_OPS:
|
|
ptype = BPF_PROG_TYPE_SOCK_OPS;
|
|
break;
|
|
case BPF_SK_SKB_STREAM_PARSER:
|
|
case BPF_SK_SKB_STREAM_VERDICT:
|
|
return sockmap_get_from_fd(attr, true);
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
|
|
prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
|
|
if (IS_ERR(prog))
|
|
return PTR_ERR(prog);
|
|
|
|
cgrp = cgroup_get_from_fd(attr->target_fd);
|
|
if (IS_ERR(cgrp)) {
|
|
bpf_prog_put(prog);
|
|
return PTR_ERR(cgrp);
|
|
}
|
|
|
|
ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
|
|
attr->attach_flags);
|
|
if (ret)
|
|
bpf_prog_put(prog);
|
|
cgroup_put(cgrp);
|
|
|
|
return ret;
|
|
}
|
|
|
|
#define BPF_PROG_DETACH_LAST_FIELD attach_type
|
|
|
|
static int bpf_prog_detach(const union bpf_attr *attr)
|
|
{
|
|
enum bpf_prog_type ptype;
|
|
struct bpf_prog *prog;
|
|
struct cgroup *cgrp;
|
|
int ret;
|
|
|
|
if (!capable(CAP_NET_ADMIN))
|
|
return -EPERM;
|
|
|
|
if (CHECK_ATTR(BPF_PROG_DETACH))
|
|
return -EINVAL;
|
|
|
|
switch (attr->attach_type) {
|
|
case BPF_CGROUP_INET_INGRESS:
|
|
case BPF_CGROUP_INET_EGRESS:
|
|
ptype = BPF_PROG_TYPE_CGROUP_SKB;
|
|
break;
|
|
case BPF_CGROUP_INET_SOCK_CREATE:
|
|
ptype = BPF_PROG_TYPE_CGROUP_SOCK;
|
|
break;
|
|
case BPF_CGROUP_SOCK_OPS:
|
|
ptype = BPF_PROG_TYPE_SOCK_OPS;
|
|
break;
|
|
case BPF_SK_SKB_STREAM_PARSER:
|
|
case BPF_SK_SKB_STREAM_VERDICT:
|
|
return sockmap_get_from_fd(attr, false);
|
|
default:
|
|
return -EINVAL;
|
|
}
|
|
|
|
cgrp = cgroup_get_from_fd(attr->target_fd);
|
|
if (IS_ERR(cgrp))
|
|
return PTR_ERR(cgrp);
|
|
|
|
prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
|
|
if (IS_ERR(prog))
|
|
prog = NULL;
|
|
|
|
ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
|
|
if (prog)
|
|
bpf_prog_put(prog);
|
|
cgroup_put(cgrp);
|
|
return ret;
|
|
}
|
|
|
|
#endif /* CONFIG_CGROUP_BPF */
|
|
|
|
#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
|
|
|
|
static int bpf_prog_test_run(const union bpf_attr *attr,
|
|
union bpf_attr __user *uattr)
|
|
{
|
|
struct bpf_prog *prog;
|
|
int ret = -ENOTSUPP;
|
|
|
|
if (CHECK_ATTR(BPF_PROG_TEST_RUN))
|
|
return -EINVAL;
|
|
|
|
prog = bpf_prog_get(attr->test.prog_fd);
|
|
if (IS_ERR(prog))
|
|
return PTR_ERR(prog);
|
|
|
|
if (prog->aux->ops->test_run)
|
|
ret = prog->aux->ops->test_run(prog, attr, uattr);
|
|
|
|
bpf_prog_put(prog);
|
|
return ret;
|
|
}
|
|
|
|
#define BPF_OBJ_GET_NEXT_ID_LAST_FIELD next_id
|
|
|
|
static int bpf_obj_get_next_id(const union bpf_attr *attr,
|
|
union bpf_attr __user *uattr,
|
|
struct idr *idr,
|
|
spinlock_t *lock)
|
|
{
|
|
u32 next_id = attr->start_id;
|
|
int err = 0;
|
|
|
|
if (CHECK_ATTR(BPF_OBJ_GET_NEXT_ID) || next_id >= INT_MAX)
|
|
return -EINVAL;
|
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
return -EPERM;
|
|
|
|
next_id++;
|
|
spin_lock_bh(lock);
|
|
if (!idr_get_next(idr, &next_id))
|
|
err = -ENOENT;
|
|
spin_unlock_bh(lock);
|
|
|
|
if (!err)
|
|
err = put_user(next_id, &uattr->next_id);
|
|
|
|
return err;
|
|
}
|
|
|
|
#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
|
|
|
|
static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
|
|
{
|
|
struct bpf_prog *prog;
|
|
u32 id = attr->prog_id;
|
|
int fd;
|
|
|
|
if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
|
|
return -EINVAL;
|
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
return -EPERM;
|
|
|
|
spin_lock_bh(&prog_idr_lock);
|
|
prog = idr_find(&prog_idr, id);
|
|
if (prog)
|
|
prog = bpf_prog_inc_not_zero(prog);
|
|
else
|
|
prog = ERR_PTR(-ENOENT);
|
|
spin_unlock_bh(&prog_idr_lock);
|
|
|
|
if (IS_ERR(prog))
|
|
return PTR_ERR(prog);
|
|
|
|
fd = bpf_prog_new_fd(prog);
|
|
if (fd < 0)
|
|
bpf_prog_put(prog);
|
|
|
|
return fd;
|
|
}
|
|
|
|
#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags
|
|
|
|
static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
|
|
{
|
|
struct bpf_map *map;
|
|
u32 id = attr->map_id;
|
|
int f_flags;
|
|
int fd;
|
|
|
|
if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) ||
|
|
attr->open_flags & ~BPF_OBJ_FLAG_MASK)
|
|
return -EINVAL;
|
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
return -EPERM;
|
|
|
|
f_flags = bpf_get_file_flag(attr->open_flags);
|
|
if (f_flags < 0)
|
|
return f_flags;
|
|
|
|
spin_lock_bh(&map_idr_lock);
|
|
map = idr_find(&map_idr, id);
|
|
if (map)
|
|
map = bpf_map_inc_not_zero(map, true);
|
|
else
|
|
map = ERR_PTR(-ENOENT);
|
|
spin_unlock_bh(&map_idr_lock);
|
|
|
|
if (IS_ERR(map))
|
|
return PTR_ERR(map);
|
|
|
|
fd = bpf_map_new_fd(map, f_flags);
|
|
if (fd < 0)
|
|
bpf_map_put_with_uref(map);
|
|
|
|
return fd;
|
|
}
|
|
|
|
static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
|
|
const union bpf_attr *attr,
|
|
union bpf_attr __user *uattr)
|
|
{
|
|
struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
|
|
struct bpf_prog_info info;
|
|
u32 info_len = attr->info.info_len;
|
|
char __user *uinsns;
|
|
u32 ulen;
|
|
int err;
|
|
|
|
err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
|
|
if (err)
|
|
return err;
|
|
info_len = min_t(u32, sizeof(info), info_len);
|
|
|
|
memset(&info, 0, sizeof(info));
|
|
if (copy_from_user(&info, uinfo, info_len))
|
|
return -EFAULT;
|
|
|
|
info.type = prog->type;
|
|
info.id = prog->aux->id;
|
|
info.load_time = prog->aux->load_time;
|
|
info.created_by_uid = from_kuid_munged(current_user_ns(),
|
|
prog->aux->user->uid);
|
|
|
|
memcpy(info.tag, prog->tag, sizeof(prog->tag));
|
|
memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
|
|
|
|
ulen = info.nr_map_ids;
|
|
info.nr_map_ids = prog->aux->used_map_cnt;
|
|
ulen = min_t(u32, info.nr_map_ids, ulen);
|
|
if (ulen) {
|
|
u32 *user_map_ids = (u32 *)info.map_ids;
|
|
u32 i;
|
|
|
|
for (i = 0; i < ulen; i++)
|
|
if (put_user(prog->aux->used_maps[i]->id,
|
|
&user_map_ids[i]))
|
|
return -EFAULT;
|
|
}
|
|
|
|
if (!capable(CAP_SYS_ADMIN)) {
|
|
info.jited_prog_len = 0;
|
|
info.xlated_prog_len = 0;
|
|
goto done;
|
|
}
|
|
|
|
ulen = info.jited_prog_len;
|
|
info.jited_prog_len = prog->jited_len;
|
|
if (info.jited_prog_len && ulen) {
|
|
uinsns = u64_to_user_ptr(info.jited_prog_insns);
|
|
ulen = min_t(u32, info.jited_prog_len, ulen);
|
|
if (copy_to_user(uinsns, prog->bpf_func, ulen))
|
|
return -EFAULT;
|
|
}
|
|
|
|
ulen = info.xlated_prog_len;
|
|
info.xlated_prog_len = bpf_prog_insn_size(prog);
|
|
if (info.xlated_prog_len && ulen) {
|
|
uinsns = u64_to_user_ptr(info.xlated_prog_insns);
|
|
ulen = min_t(u32, info.xlated_prog_len, ulen);
|
|
if (copy_to_user(uinsns, prog->insnsi, ulen))
|
|
return -EFAULT;
|
|
}
|
|
|
|
done:
|
|
if (copy_to_user(uinfo, &info, info_len) ||
|
|
put_user(info_len, &uattr->info.info_len))
|
|
return -EFAULT;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int bpf_map_get_info_by_fd(struct bpf_map *map,
|
|
const union bpf_attr *attr,
|
|
union bpf_attr __user *uattr)
|
|
{
|
|
struct bpf_map_info __user *uinfo = u64_to_user_ptr(attr->info.info);
|
|
struct bpf_map_info info;
|
|
u32 info_len = attr->info.info_len;
|
|
int err;
|
|
|
|
err = check_uarg_tail_zero(uinfo, sizeof(info), info_len);
|
|
if (err)
|
|
return err;
|
|
info_len = min_t(u32, sizeof(info), info_len);
|
|
|
|
memset(&info, 0, sizeof(info));
|
|
info.type = map->map_type;
|
|
info.id = map->id;
|
|
info.key_size = map->key_size;
|
|
info.value_size = map->value_size;
|
|
info.max_entries = map->max_entries;
|
|
info.map_flags = map->map_flags;
|
|
memcpy(info.name, map->name, sizeof(map->name));
|
|
|
|
if (copy_to_user(uinfo, &info, info_len) ||
|
|
put_user(info_len, &uattr->info.info_len))
|
|
return -EFAULT;
|
|
|
|
return 0;
|
|
}
|
|
|
|
#define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
|
|
|
|
static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
|
|
union bpf_attr __user *uattr)
|
|
{
|
|
int ufd = attr->info.bpf_fd;
|
|
struct fd f;
|
|
int err;
|
|
|
|
if (CHECK_ATTR(BPF_OBJ_GET_INFO_BY_FD))
|
|
return -EINVAL;
|
|
|
|
f = fdget(ufd);
|
|
if (!f.file)
|
|
return -EBADFD;
|
|
|
|
if (f.file->f_op == &bpf_prog_fops)
|
|
err = bpf_prog_get_info_by_fd(f.file->private_data, attr,
|
|
uattr);
|
|
else if (f.file->f_op == &bpf_map_fops)
|
|
err = bpf_map_get_info_by_fd(f.file->private_data, attr,
|
|
uattr);
|
|
else
|
|
err = -EINVAL;
|
|
|
|
fdput(f);
|
|
return err;
|
|
}
|
|
|
|
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
|
|
{
|
|
union bpf_attr attr;
|
|
int err;
|
|
|
|
if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
|
|
return -EPERM;
|
|
|
|
err = check_uarg_tail_zero(uattr, sizeof(attr), size);
|
|
if (err)
|
|
return err;
|
|
size = min_t(u32, size, sizeof(attr));
|
|
|
|
/* copy attributes from user space, may be less than sizeof(bpf_attr) */
|
|
memset(&attr, 0, sizeof(attr));
|
|
if (copy_from_user(&attr, uattr, size) != 0)
|
|
return -EFAULT;
|
|
|
|
err = security_bpf(cmd, &attr, size);
|
|
if (err < 0)
|
|
return err;
|
|
|
|
switch (cmd) {
|
|
case BPF_MAP_CREATE:
|
|
err = map_create(&attr);
|
|
break;
|
|
case BPF_MAP_LOOKUP_ELEM:
|
|
err = map_lookup_elem(&attr);
|
|
break;
|
|
case BPF_MAP_UPDATE_ELEM:
|
|
err = map_update_elem(&attr);
|
|
break;
|
|
case BPF_MAP_DELETE_ELEM:
|
|
err = map_delete_elem(&attr);
|
|
break;
|
|
case BPF_MAP_GET_NEXT_KEY:
|
|
err = map_get_next_key(&attr);
|
|
break;
|
|
case BPF_PROG_LOAD:
|
|
err = bpf_prog_load(&attr);
|
|
break;
|
|
case BPF_OBJ_PIN:
|
|
err = bpf_obj_pin(&attr);
|
|
break;
|
|
case BPF_OBJ_GET:
|
|
err = bpf_obj_get(&attr);
|
|
break;
|
|
#ifdef CONFIG_CGROUP_BPF
|
|
case BPF_PROG_ATTACH:
|
|
err = bpf_prog_attach(&attr);
|
|
break;
|
|
case BPF_PROG_DETACH:
|
|
err = bpf_prog_detach(&attr);
|
|
break;
|
|
#endif
|
|
case BPF_PROG_TEST_RUN:
|
|
err = bpf_prog_test_run(&attr, uattr);
|
|
break;
|
|
case BPF_PROG_GET_NEXT_ID:
|
|
err = bpf_obj_get_next_id(&attr, uattr,
|
|
&prog_idr, &prog_idr_lock);
|
|
break;
|
|
case BPF_MAP_GET_NEXT_ID:
|
|
err = bpf_obj_get_next_id(&attr, uattr,
|
|
&map_idr, &map_idr_lock);
|
|
break;
|
|
case BPF_PROG_GET_FD_BY_ID:
|
|
err = bpf_prog_get_fd_by_id(&attr);
|
|
break;
|
|
case BPF_MAP_GET_FD_BY_ID:
|
|
err = bpf_map_get_fd_by_id(&attr);
|
|
break;
|
|
case BPF_OBJ_GET_INFO_BY_FD:
|
|
err = bpf_obj_get_info_by_fd(&attr, uattr);
|
|
break;
|
|
default:
|
|
err = -EINVAL;
|
|
break;
|
|
}
|
|
|
|
return err;
|
|
}
|