Files
Michael Bestas ada634c3e0 Merge tag 'v4.14.340-openela' into android-msm-pixel-4.14
This is the 4.14.340 OpenELA-Extended LTS stable release

* tag 'v4.14.340-openela':
  LTS: Update to 4.14.340
  fs/aio: Restrict kiocb_set_cancel_fn() to I/O submitted via libaio
  KVM: arm64: vgic-its: Test for valid IRQ in its_sync_lpi_pending_table()
  PCI/MSI: Prevent MSI hardware interrupt number truncation
  s390: use the correct count for __iowrite64_copy()
  packet: move from strlcpy with unused retval to strscpy
  ipv6: sr: fix possible use-after-free and null-ptr-deref
  nouveau: fix function cast warnings
  scsi: jazz_esp: Only build if SCSI core is builtin
  RDMA/srpt: fix function pointer cast warnings
  RDMA/srpt: Support specifying the srpt_service_guid parameter
  IB/hfi1: Fix a memleak in init_credit_return
  usb: gadget: ncm: Avoid dropping datagrams of properly parsed NTBs
  l2tp: pass correct message length to ip6_append_data
  gtp: fix use-after-free and null-ptr-deref in gtp_genl_dump_pdp()
  dm-crypt: don't modify the data when using authenticated encryption
  mm: memcontrol: switch to rcu protection in drain_all_stock()
  s390/qeth: Fix potential loss of L3-IP@ in case of network issues
  virtio-blk: Ensure no requests in virtqueues before deleting vqs.
  firewire: core: send bus reset promptly on gap count error
  hwmon: (coretemp) Enlarge per package core count limit
  regulator: pwm-regulator: Add validity checks in continuous .get_voltage
  ext4: avoid allocating blocks from corrupted group in ext4_mb_find_by_goal()
  ext4: avoid allocating blocks from corrupted group in ext4_mb_try_best_found()
  ahci: asm1166: correct count of reported ports
  fbdev: sis: Error out if pixclock equals zero
  fbdev: savage: Error out if pixclock equals zero
  wifi: mac80211: fix race condition on enabling fast-xmit
  wifi: cfg80211: fix missing interfaces when dumping
  dmaengine: shdma: increase size of 'dev_id'
  scsi: target: core: Add TMF to tmr_list handling
  sched/rt: Disallow writing invalid values to sched_rt_period_us
  sched/rt: sysctl_sched_rr_timeslice show default timeslice after reset
  sched/rt: Fix sysctl_sched_rr_timeslice intial value
  nilfs2: replace WARN_ONs for invalid DAT metadata block requests
  memcg: add refcnt for pcpu stock to avoid UAF problem in drain_all_stock()
  net/sched: Retire dsmark qdisc
  net/sched: Retire ATM qdisc
  net/sched: Retire CBQ qdisc
  LTS: Update to 4.14.339
  netfilter: nf_tables: fix pointer math issue in nft_byteorder_eval()
  lsm: new security_file_ioctl_compat() hook
  nilfs2: fix potential bug in end_buffer_async_write
  sched/membarrier: reduce the ability to hammer on sys_membarrier
  Revert "md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d"
  pmdomain: core: Move the unused cleanup to a _sync initcall
  irqchip/irq-brcmstb-l2: Add write memory barrier before exit
  nfp: use correct macro for LengthSelect in BAR config
  nilfs2: fix hang in nilfs_lookup_dirty_data_buffers()
  nilfs2: fix data corruption in dsync block recovery for small block sizes
  ALSA: hda/conexant: Add quirk for SWS JS201D
  x86/mm/ident_map: Use gbpages only where full GB page should be mapped.
  staging: iio: ad5933: fix type mismatch regression
  ext4: fix double-free of blocks due to wrong extents moved_len
  xen-netback: properly sync TX responses
  nfc: nci: free rx_data_reassembly skb on NCI device cleanup
  firewire: core: correct documentation of fw_csr_string() kernel API
  scsi: Revert "scsi: fcoe: Fix potential deadlock on &fip->ctlr_lock"
  usb: f_mass_storage: forbid async queue when shutdown happen
  USB: hub: check for alternate port before enabling A_ALT_HNP_SUPPORT
  HID: wacom: Do not register input devices until after hid_hw_start
  HID: wacom: generic: Avoid reporting a serial of '0' to userspace
  mm/writeback: fix possible divide-by-zero in wb_dirty_limits(), again
  tracing/trigger: Fix to return error if failed to alloc snapshot
  i40e: Fix waiting for queues of all VSIs to be disabled
  MIPS: Add 'memory' clobber to csum_ipv6_magic() inline assembler
  net: sysfs: Fix /sys/class/net/<iface> path for statistics
  Documentation: net-sysfs: describe missing statistics
  ASoC: rt5645: Fix deadlock in rt5645_jack_detect_work()
  spi: ppc4xx: Drop write-only variable
  btrfs: send: return EOPNOTSUPP on unknown flags
  vhost: use kzalloc() instead of kmalloc() followed by memset()
  Input: atkbd - skip ATKBD_CMD_SETLEDS when skipping ATKBD_CMD_GETID
  USB: serial: cp210x: add ID for IMST iM871A-USB
  USB: serial: option: add Fibocom FM101-GL variant
  USB: serial: qcserial: add new usb-id for Dell Wireless DW5826e
  net/af_iucv: clean up a try_then_request_module()
  netfilter: nft_compat: restrict match/target protocol to u16
  netfilter: nft_compat: reject unused compat flag
  ppp_async: limit MRU to 64K
  tipc: Check the bearer type before calling tipc_udp_nl_bearer_add()
  rxrpc: Fix response to PING RESPONSE ACKs to a dead call
  inet: read sk->sk_family once in inet_recv_error()
  hwmon: (aspeed-pwm-tacho) mutex for tach reading
  atm: idt77252: fix a memleak in open_card_ubr0
  phy: ti: phy-omap-usb2: Fix NULL pointer dereference for SRP
  dmaengine: fix is_slave_direction() return false when DMA_DEV_TO_DEV
  bonding: remove print in bond_verify_device_path
  HID: apple: Add 2021 magic keyboard FN key mapping
  HID: apple: Add support for the 2021 Magic Keyboard
  HID: apple: Swap the Fn and Left Control keys on Apple keyboards
  net: sysfs: Fix /sys/class/net/<iface> path
  af_unix: fix lockdep positive in sk_diag_dump_icons()
  net: ipv4: fix a memleak in ip_setup_cork
  net: Fix one possible memleak in ip_setup_cork
  netfilter: nf_log: replace BUG_ON by WARN_ON_ONCE when putting logger
  llc: call sock_orphan() at release time
  ipv6: Ensure natural alignment of const ipv6 loopback and router addresses
  ixgbe: Fix an error handling path in ixgbe_read_iosf_sb_reg_x550()
  ixgbe: Refactor overtemp event handling
  ixgbe: Remove non-inclusive language
  net: remove unneeded break
  scsi: isci: Fix an error code problem in isci_io_request_build()
  wifi: cfg80211: fix RCU dereference in __cfg80211_bss_update
  drm/amdgpu: Release 'adev->pm.fw' before return in 'amdgpu_device_need_post()'
  ceph: fix deadlock or deadcode of misusing dget()
  virtio_net: Fix "‘%d’ directive writing between 1 and 11 bytes into a region of size 10" warnings
  libsubcmd: Fix memory leak in uniq()
  usb: hub: Replace hardcoded quirk value with BIT() macro
  PCI: Only override AMD USB controller if required
  mfd: ti_am335x_tscadc: Fix TI SoC dependencies
  um: net: Fix return type of uml_net_start_xmit()
  um: Don't use vfprintf() for os_info()
  um: Fix naming clash between UML and scheduler
  leds: trigger: panic: Don't register panic notifier if creating the trigger failed
  clk: mmp: pxa168: Fix memory leak in pxa168_clk_init()
  clk: hi3620: Fix memory leak in hi3620_mmc_clk_init()
  media: ddbridge: fix an error code problem in ddb_probe
  IB/ipoib: Fix mcast list locking
  drm/exynos: Call drm_atomic_helper_shutdown() at shutdown/unbind time
  ALSA: hda: Intel: add HDA_ARL PCI ID support
  ALSA: hda: Add Icelake PCI ID
  PCI: add INTEL_HDA_ARL to pci_ids.h
  media: stk1160: Fixed high volume of stk1160_dbg messages
  drm/mipi-dsi: Fix detach call without attach
  drm/framebuffer: Fix use of uninitialized variable
  drm/drm_file: fix use of uninitialized variable
  RDMA/IPoIB: Fix error code return in ipoib_mcast_join
  fast_dput(): handle underflows gracefully
  ASoC: doc: Fix undefined SND_SOC_DAPM_NOPM argument
  wifi: cfg80211: free beacon_ies when overridden from hidden BSS
  wifi: rtlwifi: rtl8723{be,ae}: using calculate_bit_shift()
  wifi: rtl8xxxu: Add additional USB IDs for RTL8192EU devices
  md: Whenassemble the array, consult the superblock of the freshest device
  ARM: dts: imx23/28: Fix the DMA controller node name
  ARM: dts: imx23-sansa: Use preferred i2c-gpios properties
  ARM: dts: imx27-apf27dev: Fix LED name
  ARM: dts: imx1: Fix sram node
  ARM: dts: imx27: Fix sram node
  ARM: dts: imx: Use flash@0,0 pattern
  ARM: dts: imx25/27-eukrea: Fix RTC node name
  ARM: dts: rockchip: fix rk3036 hdmi ports node
  scsi: libfc: Fix up timeout error in fc_fcp_rec_error()
  scsi: libfc: Don't schedule abort twice
  bpf: Add map and need_defer parameters to .map_fd_put_ptr()
  wifi: ath9k: Fix potential array-index-out-of-bounds read in ath9k_htc_txstatus()
  ARM: dts: imx7s: Fix nand-controller #size-cells
  ARM: dts: imx7s: Fix lcdif compatible
  bonding: return -ENOMEM instead of BUG in alb_upper_dev_walk
  PCI: Add no PM reset quirk for NVIDIA Spectrum devices
  scsi: lpfc: Fix possible file string name overflow when updating firmware
  ext4: unify the type of flexbg_size to unsigned int
  SUNRPC: Fix a suspicious RCU usage warning
  KVM: s390: fix setting of fpc register
  s390/ptrace: handle setting of fpc register correctly
  jfs: fix array-index-out-of-bounds in diNewExt
  rxrpc_find_service_conn_rcu: fix the usage of read_seqbegin_or_lock()
  pstore/ram: Fix crash when setting number of cpus to an odd number
  jfs: fix uaf in jfs_evict_inode
  jfs: fix array-index-out-of-bounds in dbAdjTree
  jfs: fix slab-out-of-bounds Read in dtSearch
  UBSAN: array-index-out-of-bounds in dtSplitRoot
  FS:JFS:UBSAN:array-index-out-of-bounds in dbAdjTree
  ACPI: extlog: fix NULL pointer dereference check
  PNP: ACPI: fix fortify warning
  ACPI: video: Add quirk for the Colorful X15 AT 23 Laptop
  audit: Send netlink ACK before setting connection in auditd_set
  powerpc/lib: Validate size for vector operations
  powerpc/mm: Fix build failures due to arch_reserved_kernel_pages()
  powerpc: Fix build error due to is_valid_bugaddr()
  powerpc/mm: Fix null-pointer dereference in pgtable_cache_add
  tick/sched: Preserve number of idle sleeps across CPU hotplug events
  mips: Call lose_fpu(0) before initializing fcr31 in mips_set_personality_nan
  drm/bridge: nxp-ptn3460: simplify some error checking
  drm/bridge: nxp-ptn3460: fix i2c_master_send() error checking
  drm: Don't unref the same fb many times by mistake due to deadlock handling
  gpiolib: acpi: Ignore touchpad wakeup on GPD G1619-04
  netfilter: nf_tables: reject QUEUE/DROP verdict parameters
  btrfs: defrag: reject unknown flags of btrfs_ioctl_defrag_range_args
  btrfs: don't warn if discard range is not aligned to sector
  net: fec: fix the unhandled context fault from smmu
  fjes: fix memleaks in fjes_hw_setup
  netfilter: nf_tables: restrict anonymous set and map names to 16 bytes
  net/mlx5e: fix a double-free in arfs_create_groups
  net/mlx5: Use kfree(ft->g) in arfs_create_groups()
  netlink: fix potential sleeping issue in mqueue_flush_file
  tcp: Add memory barrier to tcp_push()
  net/rds: Fix UBSAN: array-index-out-of-bounds in rds_cmsg_recv
  llc: Drop support for ETH_P_TR_802_2.
  llc: make llc_ui_sendmsg() more robust against bonding changes
  vlan: skip nested type that is not IFLA_VLAN_QOS_MAPPING
  drivers: core: fix kernel-doc markup for dev_err_probe()
  driver code: print symbolic error code
  Revert "driver core: Annotate dev_err_probe() with __must_check"
  driver core: Annotate dev_err_probe() with __must_check
  x86/CPU/AMD: Fix disabling XSAVES on AMD family 0x17 due to erratum
  powerpc: Use always instead of always-y in for crtsavres.o
  block: Remove special-casing of compound pages
  parisc/firmware: Fix F-extend for PDC addresses
  rpmsg: virtio: Free driver_override when rpmsg_remove()
  hwrng: core - Fix page fault dead lock on mmap-ed hwrng
  PM: hibernate: Enforce ordering during image compression/decompression
  crypto: api - Disallow identical driver names
  serial: sc16is7xx: add check for unsupported SPI modes during probe
  spi: introduce SPI_MODE_X_MASK macro
  driver core: add device probe log helper
  serial: sc16is7xx: set safe default SPI clock frequency
  units: add the HZ macros
  units: change from 'L' to 'UL'
  units: Add Watt units
  include/linux/units.h: add helpers for kelvin to/from Celsius conversion
  PCI: mediatek: Clear interrupt status before dispatching handler
  LTS: Update to 4.14.338
  crypto: scompress - initialize per-CPU variables on each CPU
  Revert "NFSD: Fix possible sleep during nfsd4_release_lockowner()"
  i2c: s3c24xx: fix transferring more than one message in polling mode
  i2c: s3c24xx: fix read transfers in polling mode
  kdb: Fix a potential buffer overflow in kdb_local()
  kdb: Censor attempts to set PROMPT without ENABLE_MEM_READ
  ipvs: avoid stat macros calls from preemptible context
  net: ravb: Fix dma_addr_t truncation in error case
  serial: imx: Correct clock error message in function probe()
  apparmor: avoid crash when parsed profile name is empty
  MIPS: Alchemy: Fix an out-of-bound access in db1550_dev_setup()
  MIPS: Alchemy: Fix an out-of-bound access in db1200_dev_setup()
  HID: wacom: Correct behavior when processing some confidence == false touches
  wifi: mwifiex: configure BSSID consistently when starting AP
  wifi: rtlwifi: Convert LNKCTL change to PCIe cap RMW accessors
  wifi: rtlwifi: Remove bogus and dangerous ASPM disable/enable code
  fbdev: flush deferred work in fb_deferred_io_fsync()
  ALSA: oxygen: Fix right channel of capture volume mixer
  usb: mon: Fix atomicity violation in mon_bin_vma_fault
  usb: chipidea: wait controller resume finished for wakeup irq
  usb: dwc: ep0: Update request status in dwc3_ep0_stall_restart
  usb: phy: mxs: remove CONFIG_USB_OTG condition for mxs_phy_is_otg_host()
  tick-sched: Fix idle and iowait sleeptime accounting vs CPU hotplug
  binder: fix unused alloc->free_async_space
  binder: fix race between mmput() and do_exit()
  xen-netback: don't produce zero-size SKB frags
  Input: atkbd - use ab83 as id when skipping the getid command
  binder: fix async space check for 0-sized buffers
  watchdog: bcm2835_wdt: Fix WDIOC_SETTIMEOUT handling
  watchdog: set cdev owner before adding
  gpu/drm/radeon: fix two memleaks in radeon_vm_init
  drivers/amd/pm: fix a use-after-free in kv_parse_power_table
  drm/amd/pm: fix a double-free in si_dpm_init
  media: dvbdev: drop refcount on error path in dvb_device_open()
  media: cx231xx: fix a memleak in cx231xx_init_isoc
  drm/radeon/trinity_dpm: fix a memleak in trinity_parse_power_table
  drm/radeon/dpm: fix a memleak in sumo_parse_power_table
  drm/radeon: check the alloc_workqueue return value in radeon_crtc_init()
  drm/drv: propagate errors from drm_modeset_register_all()
  drm/msm/mdp4: flush vblank event on disable
  ASoC: cs35l34: Fix GPIO name and drop legacy include
  ASoC: cs35l33: Fix GPIO name and drop legacy include
  drm/radeon: check return value of radeon_ring_lock()
  drm/radeon/r100: Fix integer overflow issues in r100_cs_track_check()
  drm/radeon/r600_cs: Fix possible int overflows in r600_cs_check_reg()
  f2fs: fix to avoid dirent corruption
  drm/bridge: Fix typo in post_disable() description
  media: pvrusb2: fix use after free on context disconnection
  RDMA/usnic: Silence uninitialized symbol smatch warnings
  ip6_tunnel: fix NEXTHDR_FRAGMENT handling in ip6_tnl_parse_tlv_enc_lim()
  Bluetooth: Fix bogus check for re-auth no supported with non-ssp
  wifi: rtlwifi: rtl8192se: using calculate_bit_shift()
  wifi: rtlwifi: rtl8192ee: using calculate_bit_shift()
  wifi: rtlwifi: rtl8192de: using calculate_bit_shift()
  rtlwifi: rtl8192de: make arrays static const, makes object smaller
  wifi: rtlwifi: rtl8192ce: using calculate_bit_shift()
  wifi: rtlwifi: rtl8192cu: using calculate_bit_shift()
  wifi: rtlwifi: rtl8192c: using calculate_bit_shift()
  wifi: rtlwifi: rtl8188ee: phy: using calculate_bit_shift()
  wifi: rtlwifi: add calculate_bit_shift()
  wifi: rtlwifi: rtl8821ae: phy: fix an undefined bitwise shift behavior
  rtlwifi: Use ffs in <foo>_phy_calculate_bit_shift
  firmware: ti_sci: Fix an off-by-one in ti_sci_debugfs_create()
  net/ncsi: Fix netlink major/minor version numbers
  ncsi: internal.h: Fix a spello
  wifi: libertas: stop selecting wext
  bpf, lpm: Fix check prefixlen before walking trie
  NFSv4.1/pnfs: Ensure we handle the error NFS4ERR_RETURNCONFLICT
  crypto: scomp - fix req->dst buffer overflow
  crypto: scompress - Use per-CPU struct instead multiple variables
  crypto: scompress - return proper error code for allocation failure
  crypto: sahara - do not resize req->src when doing hash operations
  crypto: sahara - fix processing hash requests with req->nbytes < sg->length
  crypto: sahara - improve error handling in sahara_sha_process()
  crypto: sahara - fix wait_for_completion_timeout() error handling
  crypto: sahara - fix ahash reqsize
  crypto: virtio - Wait for tasklet to complete on device remove
  pstore: ram_core: fix possible overflow in persistent_ram_init_ecc()
  crypto: sahara - fix error handling in sahara_hw_descriptor_create()
  crypto: sahara - fix processing requests with cryptlen < sg->length
  crypto: sahara - fix ahash selftest failure
  crypto: sahara - remove FLAGS_NEW_KEY logic
  crypto: af_alg - Disallow multiple in-flight AIO requests
  crypto: ccp - fix memleak in ccp_init_dm_workarea
  crypto: virtio - Handle dataq logic with tasklet
  mtd: Fix gluebi NULL pointer dereference caused by ftl notifier
  calipso: fix memory leak in netlbl_calipso_add_pass()
  netlabel: remove unused parameter in netlbl_netlink_auditinfo()
  net: netlabel: Fix kerneldoc warnings
  ACPI: video: check for error while searching for backlight device parent
  mtd: rawnand: Increment IFC_TIMEOUT_MSECS for nand controller response
  powerpc/imc-pmu: Add a null pointer check in update_events_in_group()
  powerpc/powernv: Add a null pointer check in opal_event_init()
  selftests/powerpc: Fix error handling in FPU/VMX preemption tests
  powerpc/pseries/memhp: Fix access beyond end of drmem array
  powerpc/pseries/memhotplug: Quieten some DLPAR operations
  powerpc/44x: select I2C for CURRITUCK
  powerpc: remove redundant 'default n' from Kconfig-s
  powerpc: add crtsavres.o to always-y instead of extra-y
  EDAC/thunderx: Fix possible out-of-bounds string access
  x86/lib: Fix overflow when counting digits
  coresight: etm4x: Fix width of CCITMIN field
  uio: Fix use-after-free in uio_open
  binder: fix comment on binder_alloc_new_buf() return value
  drm/crtc: fix uninitialized variable use
  Input: xpad - add Razer Wolverine V2 support
  ARC: fix spare error
  s390/scm: fix virtual vs physical address confusion
  Input: atkbd - skip ATKBD_CMD_GETID in translated mode
  reset: hisilicon: hi6220: fix Wvoid-pointer-to-enum-cast warning
  ring-buffer: Do not record in NMI if the arch does not support cmpxchg in NMI
  tracing: Add size check when printing trace_marker output
  tracing: Have large events show up as '[LINE TOO BIG]' instead of nothing
  drm/crtc: Fix uninit-value bug in drm_mode_setcrtc
  jbd2: correct the printing of write_flags in jbd2_write_superblock()
  clk: rockchip: rk3128: Fix HCLK_OTG gate register
  drm/exynos: fix a potential error pointer dereference
  ASoC: da7219: Support low DC impedance headset
  net/tg3: fix race condition in tg3_reset_task()
  ASoC: rt5650: add mutex to avoid the jack detection failure
  ASoC: cs43130: Fix incorrect frame delay configuration
  ASoC: cs43130: Fix the position of const qualifier
  f2fs: explicitly null-terminate the xattr list
  LTS: Update to 4.14.337
  ipv6: remove max_size check inline with ipv4
  ipv6: make ip6_rt_gc_expire an atomic_t
  net/dst: use a smaller percpu_counter batch for dst entries accounting
  net: add a route cache full diagnostic message
  netfilter: nf_tables: Reject tables of unsupported family
  fuse: nlookup missing decrement in fuse_direntplus_link
  mm: fix unmap_mapping_range high bits shift bug
  mm/memory-failure: check the mapcount of the precise page
  bnxt_en: Remove mis-applied code from bnxt_cfg_ntp_filters()
  asix: Add check for usbnet_get_endpoints
  net/qla3xxx: fix potential memleak in ql_alloc_buffer_queues
  net/qla3xxx: switch from 'pci_' to 'dma_' API
  LTS: create metadata for 4.14.y

 Conflicts:
	drivers/android/binder_alloc.c
	drivers/infiniband/ulp/srpt/ib_srpt.c
	drivers/usb/core/hub.c
	fs/aio.c
	fs/f2fs/namei.c
	kernel/power/swap.c

Change-Id: Ic871768afdde4511ca7dee3006c33c7d4607e280
2024-03-23 00:35:20 +02:00

696 lines
18 KiB
C

/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
* Copyright (c) 2016,2017 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/bpf.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/filter.h>
#include <linux/perf_event.h>
#include "map_in_map.h"
#define ARRAY_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
static void bpf_array_free_percpu(struct bpf_array *array)
{
int i;
for (i = 0; i < array->map.max_entries; i++) {
free_percpu(array->pptrs[i]);
cond_resched();
}
}
static int bpf_array_alloc_percpu(struct bpf_array *array)
{
void __percpu *ptr;
int i;
for (i = 0; i < array->map.max_entries; i++) {
ptr = __alloc_percpu_gfp(array->elem_size, 8,
GFP_USER | __GFP_NOWARN);
if (!ptr) {
bpf_array_free_percpu(array);
return -ENOMEM;
}
array->pptrs[i] = ptr;
cond_resched();
}
return 0;
}
/* Called from syscall */
static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
int ret, numa_node = bpf_map_attr_numa_node(attr);
u32 elem_size, index_mask, max_entries;
bool unpriv = !capable(CAP_SYS_ADMIN);
u64 cost, array_size, mask64;
struct bpf_array *array;
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size == 0 ||
attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
(percpu && numa_node != NUMA_NO_NODE))
return ERR_PTR(-EINVAL);
if (attr->value_size > KMALLOC_MAX_SIZE)
/* if value_size is bigger, the user space won't be able to
* access the elements.
*/
return ERR_PTR(-E2BIG);
elem_size = round_up(attr->value_size, 8);
max_entries = attr->max_entries;
/* On 32 bit archs roundup_pow_of_two() with max_entries that has
* upper most bit set in u32 space is undefined behavior due to
* resulting 1U << 32, so do it manually here in u64 space.
*/
mask64 = fls_long(max_entries - 1);
mask64 = 1ULL << mask64;
mask64 -= 1;
index_mask = mask64;
if (unpriv) {
/* round up array size to nearest power of 2,
* since cpu will speculate within index_mask limits
*/
max_entries = index_mask + 1;
/* Check for overflows. */
if (max_entries < attr->max_entries)
return ERR_PTR(-E2BIG);
}
array_size = sizeof(*array);
if (percpu)
array_size += (u64) max_entries * sizeof(void *);
else
array_size += (u64) max_entries * elem_size;
/* make sure there is no u32 overflow later in round_up() */
cost = array_size;
if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-ENOMEM);
if (percpu) {
cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-ENOMEM);
}
cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
ret = bpf_map_precharge_memlock(cost);
if (ret < 0)
return ERR_PTR(ret);
/* allocate all map elements and zero-initialize them */
array = bpf_map_area_alloc(array_size, numa_node);
if (!array)
return ERR_PTR(-ENOMEM);
array->index_mask = index_mask;
array->map.unpriv_array = unpriv;
/* copy mandatory map attributes */
array->map.map_type = attr->map_type;
array->map.key_size = attr->key_size;
array->map.value_size = attr->value_size;
array->map.max_entries = attr->max_entries;
array->map.map_flags = attr->map_flags;
array->map.numa_node = numa_node;
array->map.pages = cost;
array->elem_size = elem_size;
if (percpu && bpf_array_alloc_percpu(array)) {
bpf_map_area_free(array);
return ERR_PTR(-ENOMEM);
}
return &array->map;
}
/* Called from syscall or from eBPF program */
static void *array_map_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
if (unlikely(index >= array->map.max_entries))
return NULL;
return array->value + array->elem_size * (index & array->index_mask);
}
/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_insn *insn = insn_buf;
u32 elem_size = round_up(map->value_size, 8);
const int ret = BPF_REG_0;
const int map_ptr = BPF_REG_1;
const int index = BPF_REG_2;
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
if (map->unpriv_array) {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
} else {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
}
if (is_power_of_2(elem_size)) {
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
} else {
*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
}
*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
*insn++ = BPF_MOV64_IMM(ret, 0);
return insn - insn_buf;
}
/* Called from eBPF program */
static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
if (unlikely(index >= array->map.max_entries))
return NULL;
return this_cpu_ptr(array->pptrs[index & array->index_mask]);
}
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
void __percpu *pptr;
int cpu, off = 0;
u32 size;
if (unlikely(index >= array->map.max_entries))
return -ENOENT;
/* per_cpu areas are zero-filled and bpf programs can only
* access 'value_size' of them, so copying rounded areas
* will not leak any kernel data
*/
size = round_up(map->value_size, 8);
rcu_read_lock();
pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
off += size;
}
rcu_read_unlock();
return 0;
}
/* Called from syscall */
static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = key ? *(u32 *)key : U32_MAX;
u32 *next = (u32 *)next_key;
if (index >= array->map.max_entries) {
*next = 0;
return 0;
}
if (index == array->map.max_entries - 1)
return -ENOENT;
*next = index + 1;
return 0;
}
/* Called from syscall or from eBPF program */
static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
if (unlikely(map_flags > BPF_EXIST))
/* unknown flags */
return -EINVAL;
if (unlikely(index >= array->map.max_entries))
/* all elements were pre-allocated, cannot insert a new one */
return -E2BIG;
if (unlikely(map_flags == BPF_NOEXIST))
/* all elements already exist */
return -EEXIST;
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
value, map->value_size);
else
memcpy(array->value +
array->elem_size * (index & array->index_mask),
value, map->value_size);
return 0;
}
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
void __percpu *pptr;
int cpu, off = 0;
u32 size;
if (unlikely(map_flags > BPF_EXIST))
/* unknown flags */
return -EINVAL;
if (unlikely(index >= array->map.max_entries))
/* all elements were pre-allocated, cannot insert a new one */
return -E2BIG;
if (unlikely(map_flags == BPF_NOEXIST))
/* all elements already exist */
return -EEXIST;
/* the user space will provide round_up(value_size, 8) bytes that
* will be copied into per-cpu area. bpf programs can only access
* value_size of it. During lookup the same extra bytes will be
* returned or zeros which were zero-filled by percpu_alloc,
* so no kernel data leaks possible
*/
size = round_up(map->value_size, 8);
rcu_read_lock();
pptr = array->pptrs[index & array->index_mask];
for_each_possible_cpu(cpu) {
bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
off += size;
}
rcu_read_unlock();
return 0;
}
/* Called from syscall or from eBPF program */
static int array_map_delete_elem(struct bpf_map *map, void *key)
{
return -EINVAL;
}
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
static void array_map_free(struct bpf_map *map)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
/* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
* so the programs (can be more than one that used this map) were
* disconnected from events. Wait for outstanding programs to complete
* and free the array
*/
synchronize_rcu();
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
bpf_array_free_percpu(array);
bpf_map_area_free(array);
}
const struct bpf_map_ops array_map_ops = {
.map_alloc = array_map_alloc,
.map_free = array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = array_map_lookup_elem,
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
.map_gen_lookup = array_map_gen_lookup,
};
const struct bpf_map_ops percpu_array_map_ops = {
.map_alloc = array_map_alloc,
.map_free = array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = percpu_array_map_lookup_elem,
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
};
static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr)
{
/* only file descriptors can be stored in this type of map */
if (attr->value_size != sizeof(u32))
return ERR_PTR(-EINVAL);
return array_map_alloc(attr);
}
static void fd_array_map_free(struct bpf_map *map)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;
synchronize_rcu();
/* make sure it's empty */
for (i = 0; i < array->map.max_entries; i++)
BUG_ON(array->ptrs[i] != NULL);
bpf_map_area_free(array);
}
static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
{
return NULL;
}
/* only called from syscall */
int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
{
void **elem, *ptr;
int ret = 0;
if (!map->ops->map_fd_sys_lookup_elem)
return -ENOTSUPP;
rcu_read_lock();
elem = array_map_lookup_elem(map, key);
if (elem && (ptr = READ_ONCE(*elem)))
*value = map->ops->map_fd_sys_lookup_elem(ptr);
else
ret = -ENOENT;
rcu_read_unlock();
return ret;
}
/* only called from syscall */
int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
void *new_ptr, *old_ptr;
u32 index = *(u32 *)key, ufd;
if (map_flags != BPF_ANY)
return -EINVAL;
if (index >= array->map.max_entries)
return -E2BIG;
ufd = *(u32 *)value;
new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
if (IS_ERR(new_ptr))
return PTR_ERR(new_ptr);
old_ptr = xchg(array->ptrs + index, new_ptr);
if (old_ptr)
map->ops->map_fd_put_ptr(map, old_ptr, true);
return 0;
}
static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
void *old_ptr;
u32 index = *(u32 *)key;
if (index >= array->map.max_entries)
return -E2BIG;
old_ptr = xchg(array->ptrs + index, NULL);
if (old_ptr) {
map->ops->map_fd_put_ptr(map, old_ptr, true);
return 0;
} else {
return -ENOENT;
}
}
static void *prog_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file, int fd)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_prog *prog = bpf_prog_get(fd);
if (IS_ERR(prog))
return prog;
if (!bpf_prog_array_compatible(array, prog)) {
bpf_prog_put(prog);
return ERR_PTR(-EINVAL);
}
return prog;
}
static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
/* bpf_prog is freed after one RCU or tasks trace grace period */
bpf_prog_put(ptr);
}
static u32 prog_fd_array_sys_lookup_elem(void *ptr)
{
return ((struct bpf_prog *)ptr)->aux->id;
}
/* decrement refcnt of all bpf_progs that are stored in this map */
static void bpf_fd_array_map_clear(struct bpf_map *map)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;
for (i = 0; i < array->map.max_entries; i++)
fd_array_map_delete_elem(map, &i);
}
const struct bpf_map_ops prog_array_map_ops = {
.map_alloc = fd_array_map_alloc,
.map_free = fd_array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = prog_fd_array_get_ptr,
.map_fd_put_ptr = prog_fd_array_put_ptr,
.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
.map_release_uref = bpf_fd_array_map_clear,
};
static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
struct file *map_file)
{
struct bpf_event_entry *ee;
ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
if (ee) {
ee->event = perf_file->private_data;
ee->perf_file = perf_file;
ee->map_file = map_file;
}
return ee;
}
static void __bpf_event_entry_free(struct rcu_head *rcu)
{
struct bpf_event_entry *ee;
ee = container_of(rcu, struct bpf_event_entry, rcu);
fput(ee->perf_file);
kfree(ee);
}
static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
{
call_rcu(&ee->rcu, __bpf_event_entry_free);
}
static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file, int fd)
{
struct bpf_event_entry *ee;
struct perf_event *event;
struct file *perf_file;
u64 value;
perf_file = perf_event_get(fd);
if (IS_ERR(perf_file))
return perf_file;
ee = ERR_PTR(-EOPNOTSUPP);
event = perf_file->private_data;
if (perf_event_read_local(event, &value) == -EOPNOTSUPP)
goto err_out;
ee = bpf_event_entry_gen(perf_file, map_file);
if (ee)
return ee;
ee = ERR_PTR(-ENOMEM);
err_out:
fput(perf_file);
return ee;
}
static void perf_event_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
/* bpf_perf_event is freed after one RCU grace period */
bpf_event_entry_free_rcu(ptr);
}
static void perf_event_fd_array_release(struct bpf_map *map,
struct file *map_file)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_event_entry *ee;
int i;
rcu_read_lock();
for (i = 0; i < array->map.max_entries; i++) {
ee = READ_ONCE(array->ptrs[i]);
if (ee && ee->map_file == map_file)
fd_array_map_delete_elem(map, &i);
}
rcu_read_unlock();
}
const struct bpf_map_ops perf_event_array_map_ops = {
.map_alloc = fd_array_map_alloc,
.map_free = fd_array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = perf_event_fd_array_get_ptr,
.map_fd_put_ptr = perf_event_fd_array_put_ptr,
.map_release = perf_event_fd_array_release,
};
#ifdef CONFIG_CGROUPS
static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file /* not used */,
int fd)
{
return cgroup_get_from_fd(fd);
}
static void cgroup_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
{
/* cgroup_put free cgrp after a rcu grace period */
cgroup_put(ptr);
}
static void cgroup_fd_array_free(struct bpf_map *map)
{
bpf_fd_array_map_clear(map);
fd_array_map_free(map);
}
const struct bpf_map_ops cgroup_array_map_ops = {
.map_alloc = fd_array_map_alloc,
.map_free = cgroup_fd_array_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = cgroup_fd_array_get_ptr,
.map_fd_put_ptr = cgroup_fd_array_put_ptr,
};
#endif
static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
{
struct bpf_map *map, *inner_map_meta;
inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
if (IS_ERR(inner_map_meta))
return inner_map_meta;
map = fd_array_map_alloc(attr);
if (IS_ERR(map)) {
bpf_map_meta_free(inner_map_meta);
return map;
}
map->inner_map_meta = inner_map_meta;
return map;
}
static void array_of_map_free(struct bpf_map *map)
{
/* map->inner_map_meta is only accessed by syscall which
* is protected by fdget/fdput.
*/
bpf_map_meta_free(map->inner_map_meta);
bpf_fd_array_map_clear(map);
fd_array_map_free(map);
}
static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_map **inner_map = array_map_lookup_elem(map, key);
if (!inner_map)
return NULL;
return READ_ONCE(*inner_map);
}
static u32 array_of_map_gen_lookup(struct bpf_map *map,
struct bpf_insn *insn_buf)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 elem_size = round_up(map->value_size, 8);
struct bpf_insn *insn = insn_buf;
const int ret = BPF_REG_0;
const int map_ptr = BPF_REG_1;
const int index = BPF_REG_2;
*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
if (map->unpriv_array) {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
} else {
*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
}
if (is_power_of_2(elem_size))
*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
else
*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
*insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
*insn++ = BPF_MOV64_IMM(ret, 0);
return insn - insn_buf;
}
const struct bpf_map_ops array_of_maps_map_ops = {
.map_alloc = array_of_map_alloc,
.map_free = array_of_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = array_of_map_lookup_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = bpf_map_fd_get_ptr,
.map_fd_put_ptr = bpf_map_fd_put_ptr,
.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
.map_gen_lookup = array_of_map_gen_lookup,
};