Files
Michael Bestas a6c3ee045b Merge branch 'linux-4.14.y' of github.com:openela/kernel-lts into android13-4.14-msmnile
* 'linux-4.14.y' of github.com:openela/kernel-lts:
  LTS: Update to 4.14.355
  Revert "parisc: Use irq_enter_rcu() to fix warning at kernel/context_tracking.c:367"
  netns: restore ops before calling ops_exit_list
  cx82310_eth: fix error return code in cx82310_bind()
  rtmutex: Drop rt_mutex::wait_lock before scheduling
  locking/rtmutex: Handle non enqueued waiters gracefully in remove_waiter()
  drm/i915/fence: Mark debug_fence_free() with __maybe_unused
  ACPI: processor: Fix memory leaks in error paths of processor_add()
  ACPI: processor: Return an error if acpi_processor_get_info() fails in processor_add()
  netns: add pre_exit method to struct pernet_operations
  net: Add comment about pernet_operations methods and synchronization
  nilfs2: protect references to superblock parameters exposed in sysfs
  nilfs2: replace snprintf in show functions with sysfs_emit
  nilfs2: use time64_t internally
  tracing: Avoid possible softlockup in tracing_iter_reset()
  ring-buffer: Rename ring_buffer_read() to read_buffer_iter_advance()
  uprobes: Use kzalloc to allocate xol area
  clocksource/drivers/imx-tpm: Fix next event not taking effect sometime
  clocksource/drivers/imx-tpm: Fix return -ETIME when delta exceeds INT_MAX
  VMCI: Fix use-after-free when removing resource in vmci_resource_remove()
  nvmem: Fix return type of devm_nvmem_device_get() in kerneldoc
  iio: fix scale application in iio_convert_raw_to_processed_unlocked
  iio: buffer-dmaengine: fix releasing dma channel on error
  ata: pata_macio: Use WARN instead of BUG
  of/irq: Prevent device address out-of-bounds read in interrupt map walk
  Squashfs: sanity check symbolic link size
  usbnet: ipheth: race between ipheth_close and error handling
  Input: uinput - reject requests with unreasonable number of slots
  btrfs: initialize location to fix -Wmaybe-uninitialized in btrfs_lookup_dentry()
  PCI: Add missing bridge lock to pci_bus_lock()
  btrfs: clean up our handling of refs == 0 in snapshot delete
  btrfs: replace BUG_ON with ASSERT in walk_down_proc()
  smp: Add missing destroy_work_on_stack() call in smp_call_on_cpu()
  wifi: mwifiex: Do not return unused priv in mwifiex_get_priv_by_id()
  hwmon: (w83627ehf) Fix underflows seen when writing limit attributes
  hwmon: (nct6775-core) Fix underflows seen when writing limit attributes
  hwmon: (lm95234) Fix underflows seen when writing limit attributes
  hwmon: (adc128d818) Fix underflows seen when writing limit attributes
  pci/hotplug/pnv_php: Fix hotplug driver crash on Powernv
  devres: Initialize an uninitialized struct member
  um: line: always fill *error_out in setup_one_line()
  cgroup: Protect css->cgroup write under css_set_lock
  iommu/vt-d: Handle volatile descriptor status read
  rfkill: fix spelling mistake contidion to condition
  usbnet: modern method to get random MAC
  net: usb: don't write directly to netdev->dev_addr
  drivers/net/usb: Remove all strcpy() uses
  cx82310_eth: re-enable ethernet mode after router reboot
  igb: Fix not clearing TimeSync interrupts for 82580
  can: bcm: Remove proc entry when dev is unregistered.
  pcmcia: Use resource_size function on resource object
  media: qcom: camss: Add check for v4l2_fwnode_endpoint_parse
  wifi: brcmsmac: advertise MFP_CAPABLE to enable WPA3
  af_unix: Remove put_pid()/put_cred() in copy_peercred().
  irqchip/armada-370-xp: Do not allow mapping IRQ 0 and 1
  smack: unix sockets: fix accept()ed socket label
  ALSA: hda: Add input value sanity checks to HDMI channel map controls
  nilfs2: fix state management in error path of log writing function
  nilfs2: fix missing cleanup on rollforward recovery error
  fuse: use unsigned type for getxattr/listxattr size truncation
  mmc: dw_mmc: Fix IDMAC operation with pages bigger than 4K
  ALSA: hda/conexant: Add pincfg quirk to enable top speakers on Sirius devices
  sch/netem: fix use after free in netem_dequeue
  ALSA: usb-audio: Fix gpf in snd_usb_pipe_sanity_check
  ALSA: usb-audio: Sanity checks for each pipe and EP types
  ALSA: usb-audio: add boot quirk for Axe-Fx III
  udf: Limit file size to 4TB
  block: initialize integrity buffer to zero before writing it to media
  media: uvcvideo: Enforce alignment of frame and interval
  smack: tcp: ipv4, fix incorrect labeling
  usbip: Don't submit special requests twice
  apparmor: fix possible NULL pointer dereference
  drm/amdgpu: fix mc_data out-of-bounds read warning
  drm/amdgpu: fix ucode out-of-bounds read warning
  drm/amdgpu: fix overflowed array index read warning
  drm/amdgpu: Fix uninitialized variable warning in amdgpu_afmt_acr
  usb: dwc3: st: add missing depopulate in probe error path
  usb: dwc3: st: Add of_node_put() before return in probe function
  net: usb: qmi_wwan: add MeiG Smart SRM825L
  LTS: Update to 4.14.354
  drm/fb-helper: set x/yres_virtual in drm_fb_helper_check_var
  ipc: remove memcg accounting for sops objects in do_semtimedop()
  scsi: aacraid: Fix double-free on probe failure
  usb: core: sysfs: Unmerge @usb3_hardware_lpm_attr_group in remove_power_attributes()
  usb: dwc3: st: fix probed platform device ref count on probe error path
  usb: dwc3: core: Prevent USB core invalid event buffer address access
  usb: dwc3: omap: add missing depopulate in probe error path
  USB: serial: option: add MeiG Smart SRM825L
  cdc-acm: Add DISABLE_ECHO quirk for GE HealthCare UI Controller
  net: busy-poll: use ktime_get_ns() instead of local_clock()
  gtp: fix a potential NULL pointer dereference
  net: prevent mss overflow in skb_segment()
  ida: Fix crash in ida_free when the bitmap is empty
  net:rds: Fix possible deadlock in rds_message_put
  fbmem: Check virtual screen sizes in fb_set_var()
  fbcon: Prevent that screen size is smaller than font size
  printk: Export is_console_locked
  memcg: enable accounting of ipc resources
  cgroup/cpuset: Prevent UAF in proc_cpuset_show()
  media: uvcvideo: Fix integer overflow calculating timestamp
  media: uvcvideo: Use ktime_t for timestamps
  filelock: Correct the filelock owner in fcntl_setlk/fcntl_setlk64
  scsi: mpt3sas: Avoid IOMMU page faults on REPORT ZONES
  dm suspend: return -ERESTARTSYS instead of -EINTR
  wifi: mwifiex: duplicate static structs used in driver instances
  pinctrl: single: fix potential NULL dereference in pcs_get_function()
  drm/amdgpu: Using uninitialized value *size when calling amdgpu_vce_cs_reloc
  Input: MT - limit max slots
  Bluetooth: hci_ldisc: check HCI_UART_PROTO_READY flag in HCIUARTGETPROTO
  ALSA: timer: Relax start tick time check for slave timer elements
  mmc: dw_mmc: allow biu and ciu clocks to defer
  HID: wacom: Defer calculation of resolution until resolution_code is known
  Bluetooth: MGMT: Add error handling to pair_device()
  mmc: mmc_test: Fix NULL dereference on allocation failure
  net: xilinx: axienet: Always disable promiscuous mode
  ipv6: prevent UAF in ip6_send_skb()
  netfilter: nft_counter: Synchronize nft_counter_reset() against reader.
  kcm: Serialise kcm_sendmsg() for the same socket.
  Bluetooth: hci_core: Fix LE quote calculation
  Bluetooth: hci_core: Fix not handling link timeouts propertly
  Bluetooth: Make use of __check_timeout on hci_sched_le
  block: use "unsigned long" for blk_validate_block_size().
  gtp: pull network headers in gtp_dev_xmit()
  hrtimer: Prevent queuing of hrtimer without a function callback
  nvmet-rdma: fix possible bad dereference when freeing rsps
  ext4: set the type of max_zeroout to unsigned int to avoid overflow
  irqchip/gic-v3-its: Remove BUG_ON in its_vpe_irq_domain_alloc
  usb: dwc3: core: Skip setting event buffers for host only controllers
  s390/iucv: fix receive buffer virtual vs physical address confusion
  openrisc: Call setup_memory() earlier in the init sequence
  NFS: avoid infinite loop in pnfs_update_layout.
  Bluetooth: bnep: Fix out-of-bound access
  usb: gadget: fsl: Increase size of name buffer for endpoints
  f2fs: fix to do sanity check in update_sit_entry
  btrfs: delete pointless BUG_ON check on quota root in btrfs_qgroup_account_extent()
  btrfs: send: handle unexpected data in header buffer in begin_cmd()
  btrfs: handle invalid root reference found in may_destroy_subvol()
  btrfs: change BUG_ON to assertion when checking for delayed_node root
  powerpc/boot: Only free if realloc() succeeds
  powerpc/boot: Handle allocation failure in simple_realloc()
  parisc: Use irq_enter_rcu() to fix warning at kernel/context_tracking.c:367
  md: clean up invalid BUG_ON in md_ioctl
  net/sun3_82586: Avoid reading past buffer in debug output
  scsi: lpfc: Initialize status local variable in lpfc_sli4_repost_sgl_list()
  fs: binfmt_elf_efpic: don't use missing interpreter's properties
  media: pci: cx23885: check cx23885_vdev_init() return
  quota: Remove BUG_ON from dqget()
  ext4: do not trim the group with corrupted block bitmap
  powerpc/xics: Check return value of kasprintf in icp_native_map_one_cpu
  wifi: iwlwifi: abort scan when rfkill on but device enabled
  gfs2: setattr_chown: Add missing initialization
  scsi: spi: Fix sshdr use
  binfmt_misc: cleanup on filesystem umount
  staging: ks7010: disable bh on tx_dev_lock
  wifi: cw1200: Avoid processing an invalid TIM IE
  ssb: Fix division by zero issue in ssb_calc_clock_rate
  atm: idt77252: prevent use after free in dequeue_rx()
  btrfs: rename bitmap_set_bits() -> btrfs_bitmap_set_bits()
  overflow: Implement size_t saturating arithmetic helpers
  overflow.h: Add flex_array_size() helper
  s390/cio: rename bitmap_size() -> idset_bitmap_size()
  memcg_write_event_control(): fix a user-triggerable oops
  drm/amdgpu: Actually check flags for all context ops.
  selinux: fix potential counting error in avc_add_xperms_decision()
  include/linux/bitmap.h: make bitmap_fill() and bitmap_zero() consistent
  dm persistent data: fix memory allocation failure
  dm resume: don't return EINVAL when signalled
  ALSA: usb-audio: Support Yamaha P-125 quirk entry
  fuse: Initialize beyond-EOF page contents before setting uptodate

 Conflicts:
	drivers/mmc/core/mmc_test.c
	drivers/net/usb/usbnet.c

Change-Id: I718c5312df474678aea17713b30a8a669bdae348
2024-11-04 03:54:51 +02:00

833 lines
22 KiB
C

/*
* Generic helpers for smp ipi calls
*
* (C) Jens Axboe <jens.axboe@oracle.com> 2008
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/irq_work.h>
#include <linux/rcupdate.h>
#include <linux/rculist.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/percpu.h>
#include <linux/init.h>
#include <linux/gfp.h>
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/sched.h>
#include <linux/sched/idle.h>
#include <linux/hypervisor.h>
#include <linux/suspend.h>
#include "smpboot.h"
enum {
CSD_FLAG_LOCK = 0x01,
CSD_FLAG_SYNCHRONOUS = 0x02,
};
struct call_function_data {
call_single_data_t __percpu *csd;
cpumask_var_t cpumask;
cpumask_var_t cpumask_ipi;
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
static void flush_smp_call_function_queue(bool warn_cpu_offline);
/* CPU mask indicating which CPUs to bring online during smp_init() */
static bool have_boot_cpu_mask;
static cpumask_var_t boot_cpu_mask;
int smpcfd_prepare_cpu(unsigned int cpu)
{
struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
cpu_to_node(cpu)))
return -ENOMEM;
if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
cpu_to_node(cpu))) {
free_cpumask_var(cfd->cpumask);
return -ENOMEM;
}
cfd->csd = alloc_percpu(call_single_data_t);
if (!cfd->csd) {
free_cpumask_var(cfd->cpumask);
free_cpumask_var(cfd->cpumask_ipi);
return -ENOMEM;
}
return 0;
}
int smpcfd_dead_cpu(unsigned int cpu)
{
struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
free_cpumask_var(cfd->cpumask);
free_cpumask_var(cfd->cpumask_ipi);
free_percpu(cfd->csd);
return 0;
}
int smpcfd_dying_cpu(unsigned int cpu)
{
/*
* The IPIs for the smp-call-function callbacks queued by other
* CPUs might arrive late, either due to hardware latencies or
* because this CPU disabled interrupts (inside stop-machine)
* before the IPIs were sent. So flush out any pending callbacks
* explicitly (without waiting for the IPIs to arrive), to
* ensure that the outgoing CPU doesn't go offline with work
* still pending.
*/
flush_smp_call_function_queue(false);
return 0;
}
void __init call_function_init(void)
{
int i;
for_each_possible_cpu(i)
init_llist_head(&per_cpu(call_single_queue, i));
smpcfd_prepare_cpu(smp_processor_id());
}
/*
* csd_lock/csd_unlock used to serialize access to per-cpu csd resources
*
* For non-synchronous ipi calls the csd can still be in use by the
* previous function call. For multi-cpu calls its even more interesting
* as we'll have to ensure no other cpu is observing our csd.
*/
static __always_inline void csd_lock_wait(struct __call_single_data *csd)
{
smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
}
static __always_inline void csd_lock(struct __call_single_data *csd)
{
csd_lock_wait(csd);
csd->flags |= CSD_FLAG_LOCK;
/*
* prevent CPU from reordering the above assignment
* to ->flags with any subsequent assignments to other
* fields of the specified call_single_data_t structure:
*/
smp_wmb();
}
static __always_inline void csd_unlock(struct __call_single_data *csd)
{
WARN_ON(!(csd->flags & CSD_FLAG_LOCK));
/*
* ensure we're all done before releasing data:
*/
smp_store_release(&csd->flags, 0);
}
static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
/*
* Insert a previously allocated call_single_data_t element
* for execution on the given CPU. data must already have
* ->func, ->info, and ->flags set.
*/
static int generic_exec_single(int cpu, struct __call_single_data *csd,
smp_call_func_t func, void *info)
{
if (cpu == smp_processor_id()) {
unsigned long flags;
/*
* We can unlock early even for the synchronous on-stack case,
* since we're doing this from the same CPU..
*/
csd_unlock(csd);
local_irq_save(flags);
func(info);
local_irq_restore(flags);
return 0;
}
if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
csd_unlock(csd);
return -ENXIO;
}
csd->func = func;
csd->info = info;
/*
* The list addition should be visible before sending the IPI
* handler locks the list to pull the entry off it because of
* normal cache coherency rules implied by spinlocks.
*
* If IPIs can go out of order to the cache coherency protocol
* in an architecture, sufficient synchronisation should be added
* to arch code to make it appear to obey cache coherency WRT
* locking and barrier primitives. Generic code isn't really
* equipped to do the right thing...
*/
if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
arch_send_call_function_single_ipi(cpu);
return 0;
}
/**
* generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks
*
* Invoked by arch to handle an IPI for call function single.
* Must be called with interrupts disabled.
*/
void generic_smp_call_function_single_interrupt(void)
{
flush_smp_call_function_queue(true);
}
/**
* flush_smp_call_function_queue - Flush pending smp-call-function callbacks
*
* @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
* offline CPU. Skip this check if set to 'false'.
*
* Flush any pending smp-call-function callbacks queued on this CPU. This is
* invoked by the generic IPI handler, as well as by a CPU about to go offline,
* to ensure that all pending IPI callbacks are run before it goes completely
* offline.
*
* Loop through the call_single_queue and run all the queued callbacks.
* Must be called with interrupts disabled.
*/
static void flush_smp_call_function_queue(bool warn_cpu_offline)
{
struct llist_head *head;
struct llist_node *entry;
call_single_data_t *csd, *csd_next;
static bool warned;
WARN_ON(!irqs_disabled());
head = this_cpu_ptr(&call_single_queue);
entry = llist_del_all(head);
entry = llist_reverse_order(entry);
/* There shouldn't be any pending callbacks on an offline CPU. */
if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
!warned && entry != NULL)) {
warned = true;
WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
/*
* We don't have to use the _safe() variant here
* because we are not invoking the IPI handlers yet.
*/
llist_for_each_entry(csd, entry, llist)
pr_warn("IPI callback %pS sent to offline CPU\n",
csd->func);
}
llist_for_each_entry_safe(csd, csd_next, entry, llist) {
smp_call_func_t func = csd->func;
void *info = csd->info;
/* Do we wait until *after* callback? */
if (csd->flags & CSD_FLAG_SYNCHRONOUS) {
func(info);
csd_unlock(csd);
} else {
csd_unlock(csd);
func(info);
}
}
/*
* Handle irq works queued remotely by irq_work_queue_on().
* Smp functions above are typically synchronous so they
* better run first since some other CPUs may be busy waiting
* for them.
*/
irq_work_run();
}
/*
* smp_call_function_single - Run a function on a specific CPU
* @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function.
* @wait: If true, wait until function has completed on other CPUs.
*
* Returns 0 on success, else a negative status code.
*/
int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
int wait)
{
call_single_data_t *csd;
call_single_data_t csd_stack = {
.flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS,
};
int this_cpu;
int err;
/*
* prevent preemption and reschedule on another processor,
* as well as CPU removal
*/
this_cpu = get_cpu();
/*
* Can deadlock when called with interrupts disabled.
* We allow cpu's that are not yet online though, as no one else can
* send smp call function interrupt to this cpu and as such deadlocks
* can't happen.
*/
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress);
csd = &csd_stack;
if (!wait) {
csd = this_cpu_ptr(&csd_data);
csd_lock(csd);
}
err = generic_exec_single(cpu, csd, func, info);
if (wait)
csd_lock_wait(csd);
put_cpu();
return err;
}
EXPORT_SYMBOL(smp_call_function_single);
/**
* smp_call_function_single_async(): Run an asynchronous function on a
* specific CPU.
* @cpu: The CPU to run on.
* @csd: Pre-allocated and setup data structure
*
* Like smp_call_function_single(), but the call is asynchonous and
* can thus be done from contexts with disabled interrupts.
*
* The caller passes his own pre-allocated data structure
* (ie: embedded in an object) and is responsible for synchronizing it
* such that the IPIs performed on the @csd are strictly serialized.
*
* NOTE: Be careful, there is unfortunately no current debugging facility to
* validate the correctness of this serialization.
*/
int smp_call_function_single_async(int cpu, struct __call_single_data *csd)
{
int err = 0;
preempt_disable();
/* We could deadlock if we have to wait here with interrupts disabled! */
if (WARN_ON_ONCE(csd->flags & CSD_FLAG_LOCK))
csd_lock_wait(csd);
csd->flags = CSD_FLAG_LOCK;
smp_wmb();
err = generic_exec_single(cpu, csd, csd->func, csd->info);
preempt_enable();
return err;
}
EXPORT_SYMBOL_GPL(smp_call_function_single_async);
/*
* smp_call_function_any - Run a function on any of the given cpus
* @mask: The mask of cpus it can run on.
* @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function.
* @wait: If true, wait until function has completed.
*
* Returns 0 on success, else a negative status code (if no cpus were online).
*
* Selection preference:
* 1) current cpu if in @mask
* 2) any cpu of current node if in @mask
* 3) any other online cpu in @mask
*/
int smp_call_function_any(const struct cpumask *mask,
smp_call_func_t func, void *info, int wait)
{
unsigned int cpu;
const struct cpumask *nodemask;
int ret;
/* Try for same CPU (cheapest) */
cpu = get_cpu();
if (cpumask_test_cpu(cpu, mask))
goto call;
/* Try for same node. */
nodemask = cpumask_of_node(cpu_to_node(cpu));
for (cpu = cpumask_first_and(nodemask, mask); cpu < nr_cpu_ids;
cpu = cpumask_next_and(cpu, nodemask, mask)) {
if (cpu_online(cpu))
goto call;
}
/* Any online will do: smp_call_function_single handles nr_cpu_ids. */
cpu = cpumask_any_and(mask, cpu_online_mask);
call:
ret = smp_call_function_single(cpu, func, info, wait);
put_cpu();
return ret;
}
EXPORT_SYMBOL_GPL(smp_call_function_any);
/**
* smp_call_function_many(): Run a function on a set of other CPUs.
* @mask: The set of cpus to run on (only runs on online subset).
* @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function.
* @wait: If true, wait (atomically) until function has completed
* on other CPUs.
*
* If @wait is true, then returns once @func has returned.
*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler. Preemption
* must be disabled when calling this function.
*/
void smp_call_function_many(const struct cpumask *mask,
smp_call_func_t func, void *info, bool wait)
{
struct call_function_data *cfd;
int cpu, next_cpu, this_cpu = smp_processor_id();
/*
* Can deadlock when called with interrupts disabled.
* We allow cpu's that are not yet online though, as no one else can
* send smp call function interrupt to this cpu and as such deadlocks
* can't happen.
*/
WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
&& !oops_in_progress && !early_boot_irqs_disabled);
/* Try to fastpath. So, what's a CPU they want? Ignoring this one. */
cpu = cpumask_first_and(mask, cpu_online_mask);
if (cpu == this_cpu)
cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
/* No online cpus? We're done. */
if (cpu >= nr_cpu_ids)
return;
/* Do we have another CPU which isn't us? */
next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
if (next_cpu == this_cpu)
next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
/* Fastpath: do that cpu by itself. */
if (next_cpu >= nr_cpu_ids) {
smp_call_function_single(cpu, func, info, wait);
return;
}
cfd = this_cpu_ptr(&cfd_data);
cpumask_and(cfd->cpumask, mask, cpu_online_mask);
__cpumask_clear_cpu(this_cpu, cfd->cpumask);
/* Some callers race with other cpus changing the passed mask */
if (unlikely(!cpumask_weight(cfd->cpumask)))
return;
cpumask_clear(cfd->cpumask_ipi);
for_each_cpu(cpu, cfd->cpumask) {
call_single_data_t *csd = per_cpu_ptr(cfd->csd, cpu);
csd_lock(csd);
if (wait)
csd->flags |= CSD_FLAG_SYNCHRONOUS;
csd->func = func;
csd->info = info;
if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
__cpumask_set_cpu(cpu, cfd->cpumask_ipi);
}
/* Send a message to all CPUs in the map */
arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
if (wait) {
for_each_cpu(cpu, cfd->cpumask) {
call_single_data_t *csd;
csd = per_cpu_ptr(cfd->csd, cpu);
csd_lock_wait(csd);
}
}
}
EXPORT_SYMBOL(smp_call_function_many);
/**
* smp_call_function(): Run a function on all other CPUs.
* @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function.
* @wait: If true, wait (atomically) until function has completed
* on other CPUs.
*
* Returns 0.
*
* If @wait is true, then returns once @func has returned; otherwise
* it returns just before the target cpu calls @func.
*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler.
*/
int smp_call_function(smp_call_func_t func, void *info, int wait)
{
preempt_disable();
smp_call_function_many(cpu_online_mask, func, info, wait);
preempt_enable();
return 0;
}
EXPORT_SYMBOL(smp_call_function);
/* Setup configured maximum number of CPUs to activate */
unsigned int setup_max_cpus = NR_CPUS;
EXPORT_SYMBOL(setup_max_cpus);
/*
* Setup routine for controlling SMP activation
*
* Command-line option of "nosmp" or "maxcpus=0" will disable SMP
* activation entirely (the MPS table probe still happens, though).
*
* Command-line option of "maxcpus=<NUM>", where <NUM> is an integer
* greater than 0, limits the maximum number of CPUs activated in
* SMP mode to <NUM>.
*/
void __weak arch_disable_smp_support(void) { }
static int __init nosmp(char *str)
{
setup_max_cpus = 0;
arch_disable_smp_support();
return 0;
}
early_param("nosmp", nosmp);
/* this is hard limit */
static int __init nrcpus(char *str)
{
int nr_cpus;
get_option(&str, &nr_cpus);
if (nr_cpus > 0 && nr_cpus < nr_cpu_ids)
nr_cpu_ids = nr_cpus;
return 0;
}
early_param("nr_cpus", nrcpus);
static int __init maxcpus(char *str)
{
get_option(&str, &setup_max_cpus);
if (setup_max_cpus == 0)
arch_disable_smp_support();
return 0;
}
early_param("maxcpus", maxcpus);
static int __init boot_cpus(char *str)
{
alloc_bootmem_cpumask_var(&boot_cpu_mask);
if (cpulist_parse(str, boot_cpu_mask) < 0) {
pr_warn("SMP: Incorrect boot_cpus cpumask\n");
return -EINVAL;
}
have_boot_cpu_mask = true;
return 0;
}
early_param("boot_cpus", boot_cpus);
/* Setup number of possible processor ids */
unsigned int nr_cpu_ids __read_mostly = NR_CPUS;
EXPORT_SYMBOL(nr_cpu_ids);
/* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
void __init setup_nr_cpu_ids(void)
{
nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
}
static inline bool boot_cpu(int cpu)
{
if (!have_boot_cpu_mask)
return true;
return cpumask_test_cpu(cpu, boot_cpu_mask);
}
static inline void free_boot_cpu_mask(void)
{
if (have_boot_cpu_mask) /* Allocated from boot_cpus() */
free_bootmem_cpumask_var(boot_cpu_mask);
}
/* Called by boot processor to activate the rest. */
void __init smp_init(void)
{
int num_nodes, num_cpus;
unsigned int cpu;
idle_threads_init();
cpuhp_threads_init();
pr_info("Bringing up secondary CPUs ...\n");
/* FIXME: This should be done in userspace --RR */
for_each_present_cpu(cpu) {
if (num_online_cpus() >= setup_max_cpus)
break;
if (!cpu_online(cpu) && boot_cpu(cpu))
cpu_up(cpu);
}
free_boot_cpu_mask();
num_nodes = num_online_nodes();
num_cpus = num_online_cpus();
pr_info("Brought up %d node%s, %d CPU%s\n",
num_nodes, (num_nodes > 1 ? "s" : ""),
num_cpus, (num_cpus > 1 ? "s" : ""));
/* Any cleanup work */
smp_cpus_done(setup_max_cpus);
}
/*
* Call a function on all processors. May be used during early boot while
* early_boot_irqs_disabled is set. Use local_irq_save/restore() instead
* of local_irq_disable/enable().
*/
int on_each_cpu(void (*func) (void *info), void *info, int wait)
{
unsigned long flags;
int ret = 0;
preempt_disable();
ret = smp_call_function(func, info, wait);
local_irq_save(flags);
func(info);
local_irq_restore(flags);
preempt_enable();
return ret;
}
EXPORT_SYMBOL(on_each_cpu);
/**
* on_each_cpu_mask(): Run a function on processors specified by
* cpumask, which may include the local processor.
* @mask: The set of cpus to run on (only runs on online subset).
* @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function.
* @wait: If true, wait (atomically) until function has completed
* on other CPUs.
*
* If @wait is true, then returns once @func has returned.
*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler. The
* exception is that it may be used during early boot while
* early_boot_irqs_disabled is set.
*/
void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
void *info, bool wait)
{
int cpu = get_cpu();
smp_call_function_many(mask, func, info, wait);
if (cpumask_test_cpu(cpu, mask)) {
unsigned long flags;
local_irq_save(flags);
func(info);
local_irq_restore(flags);
}
put_cpu();
}
EXPORT_SYMBOL(on_each_cpu_mask);
/*
* on_each_cpu_cond(): Call a function on each processor for which
* the supplied function cond_func returns true, optionally waiting
* for all the required CPUs to finish. This may include the local
* processor.
* @cond_func: A callback function that is passed a cpu id and
* the the info parameter. The function is called
* with preemption disabled. The function should
* return a blooean value indicating whether to IPI
* the specified CPU.
* @func: The function to run on all applicable CPUs.
* This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to both functions.
* @wait: If true, wait (atomically) until function has
* completed on other CPUs.
* @gfp_flags: GFP flags to use when allocating the cpumask
* used internally by the function.
*
* The function might sleep if the GFP flags indicates a non
* atomic allocation is allowed.
*
* Preemption is disabled to protect against CPUs going offline but not online.
* CPUs going online during the call will not be seen or sent an IPI.
*
* You must not call this function with disabled interrupts or
* from a hardware interrupt handler or from a bottom half handler.
*/
void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
smp_call_func_t func, void *info, bool wait,
gfp_t gfp_flags)
{
cpumask_var_t cpus;
int cpu, ret;
might_sleep_if(gfpflags_allow_blocking(gfp_flags));
if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
preempt_disable();
for_each_online_cpu(cpu)
if (cond_func(cpu, info))
cpumask_set_cpu(cpu, cpus);
on_each_cpu_mask(cpus, func, info, wait);
preempt_enable();
free_cpumask_var(cpus);
} else {
/*
* No free cpumask, bother. No matter, we'll
* just have to IPI them one by one.
*/
preempt_disable();
for_each_online_cpu(cpu)
if (cond_func(cpu, info)) {
ret = smp_call_function_single(cpu, func,
info, wait);
WARN_ON_ONCE(ret);
}
preempt_enable();
}
}
EXPORT_SYMBOL(on_each_cpu_cond);
static void do_nothing(void *unused)
{
}
/**
* kick_all_cpus_sync - Force all cpus out of idle
*
* Used to synchronize the update of pm_idle function pointer. It's
* called after the pointer is updated and returns after the dummy
* callback function has been executed on all cpus. The execution of
* the function can only happen on the remote cpus after they have
* left the idle function which had been called via pm_idle function
* pointer. So it's guaranteed that nothing uses the previous pointer
* anymore.
*/
void kick_all_cpus_sync(void)
{
/* Make sure the change is visible before we kick the cpus */
smp_mb();
smp_call_function(do_nothing, NULL, 1);
}
EXPORT_SYMBOL_GPL(kick_all_cpus_sync);
/**
* wake_up_all_idle_cpus - break all cpus out of idle
* wake_up_all_idle_cpus try to break all cpus which is in idle state even
* including idle polling cpus, for non-idle cpus, we will do nothing
* for them.
*/
void wake_up_all_idle_cpus(void)
{
int cpu;
preempt_disable();
for_each_online_cpu(cpu) {
if (cpu == smp_processor_id())
continue;
if (s2idle_state == S2IDLE_STATE_ENTER ||
!cpu_isolated(cpu))
wake_up_if_idle(cpu);
}
preempt_enable();
}
EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus);
/**
* smp_call_on_cpu - Call a function on a specific cpu
*
* Used to call a function on a specific cpu and wait for it to return.
* Optionally make sure the call is done on a specified physical cpu via vcpu
* pinning in order to support virtualized environments.
*/
struct smp_call_on_cpu_struct {
struct work_struct work;
struct completion done;
int (*func)(void *);
void *data;
int ret;
int cpu;
};
static void smp_call_on_cpu_callback(struct work_struct *work)
{
struct smp_call_on_cpu_struct *sscs;
sscs = container_of(work, struct smp_call_on_cpu_struct, work);
if (sscs->cpu >= 0)
hypervisor_pin_vcpu(sscs->cpu);
sscs->ret = sscs->func(sscs->data);
if (sscs->cpu >= 0)
hypervisor_pin_vcpu(-1);
complete(&sscs->done);
}
int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
{
struct smp_call_on_cpu_struct sscs = {
.done = COMPLETION_INITIALIZER_ONSTACK(sscs.done),
.func = func,
.data = par,
.cpu = phys ? cpu : -1,
};
INIT_WORK_ONSTACK(&sscs.work, smp_call_on_cpu_callback);
if (cpu >= nr_cpu_ids || !cpu_online(cpu))
return -ENXIO;
queue_work_on(cpu, system_wq, &sscs.work);
wait_for_completion(&sscs.done);
destroy_work_on_stack(&sscs.work);
return sscs.ret;
}
EXPORT_SYMBOL_GPL(smp_call_on_cpu);